Esempio n. 1
0
def test_transform_stochastic_k_x():

    transform_config = {'name': 'stochastic_k'}

    feature = FinancialFeature(name='high',
                               transformation=transform_config,
                               normalization=None,
                               nbins=10,
                               ndays=10,
                               resample_minutes=0,
                               start_market_minute=150,
                               is_target=True,
                               calendar=sample_market_calendar,
                               local=False,
                               length=10)

    transform = TransformStochasticK(transform_config)

    raw_dataframe = financial_data_fixtures[feature.name]

    processed_prediction_data_x = transform.transform_x(feature, raw_dataframe)
    columns = raw_dataframe.columns

    expected_result = ((raw_dataframe.iloc[-1] - raw_dataframe.min()) /
                       (raw_dataframe.max() - raw_dataframe.min())) * 100.

    expected_result = np.expand_dims(expected_result, axis=0)
    expected_result = pd.DataFrame(expected_result, columns=columns)

    assert_almost_equal(processed_prediction_data_x.values,
                        expected_result.values, ASSERT_NDECIMALS)
Esempio n. 2
0
def test_transform_log_return_x():

    transform_config = {'name': 'log-return'}

    feature = FinancialFeature(name='close',
                               transformation=transform_config,
                               normalization=None,
                               nbins=10,
                               ndays=5,
                               resample_minutes=0,
                               start_market_minute=90,
                               is_target=True,
                               calendar=sample_market_calendar,
                               local=False,
                               length=35)

    transform = TransformLogReturn(transform_config)

    data_dict_x = financial_data_fixtures
    raw_dataframe = data_dict_x[feature.name]

    transformed_data = transform.transform_x(feature, raw_dataframe)

    symbol = 'AAPL'
    original_data = raw_dataframe[symbol]
    transformed_data = transformed_data[symbol]

    random_index = random.randrange(1, len(original_data) - 1)

    expected_value = np.log(original_data.iloc[random_index] /
                            original_data.iloc[random_index - 1])
    assert_almost_equal(transformed_data.iloc[random_index], expected_value,
                        ASSERT_NDECIMALS)

    assert np.isnan(transformed_data.iloc[0])
def test_transform_ker_x():

    transform_config = {'name': 'ker', 'lag': 20}

    feature = FinancialFeature(
            name='high',
            transformation=transform_config,
            normalization=None,
            nbins=10,
            ndays=10,
            resample_minutes=0,
            start_market_minute=150,
            is_target=True,
            calendar=sample_market_calendar,
            local=False,
            length=10
        )

    raw_dataframe = financial_data_fixtures[feature.name]

    transform = TransformKer(transform_config)
    processed_prediction_data_x = transform.transform_x(feature, raw_dataframe)

    direction = raw_dataframe.diff(transform.lag).abs()
    volatility = raw_dataframe.diff().abs().rolling(window=transform.lag).sum()

    direction.dropna(axis=0, inplace=True)
    volatility.dropna(axis=0, inplace=True)

    expected_result = direction / volatility
    expected_result.dropna(axis=0, inplace=True)

    assert_almost_equal(processed_prediction_data_x.values, expected_result.values, ASSERT_NDECIMALS)
Esempio n. 4
0
def test_transform_log_return_y():

    transform_config = {'name': 'log-return'}

    feature = FinancialFeature(name='close',
                               transformation=transform_config,
                               normalization=None,
                               nbins=10,
                               ndays=5,
                               resample_minutes=0,
                               start_market_minute=90,
                               is_target=True,
                               calendar=sample_market_calendar,
                               local=False,
                               length=35)

    transform = TransformLogReturn(transform_config)

    data_dict_x = financial_data_fixtures
    raw_dataframe = data_dict_x[feature.name]

    data_frame_x = raw_dataframe.iloc[:-1]
    prediction_reference_data = data_frame_x.iloc[-1]
    data_frame_y = raw_dataframe.iloc[-1]

    transformed_data = transform.transform_y(feature, data_frame_y,
                                             prediction_reference_data)

    expected_log_returns = np.log(data_frame_y / prediction_reference_data)

    assert_almost_equal(transformed_data, expected_log_returns.values,
                        ASSERT_NDECIMALS)
Esempio n. 5
0
def test_transform_volatility_x():

    transform_config = {'name': 'volatility', 'window': 10}

    feature = FinancialFeature(
            name='close',
            transformation={'name': 'volatility', 'window': 10},
            normalization='standard',
            nbins=10,
            ndays=10,
            resample_minutes=0,
            start_market_minute=150,
            is_target=True,
            calendar=sample_market_calendar,
            local=True,
            length=10
        )

    transform = TransformVolatility(transform_config)

    raw_dataframe = financial_data_fixtures[feature.name]

    processed_prediction_data_x = transform.transform_x(feature, raw_dataframe)

    assert processed_prediction_data_x.shape == (256, 5)
Esempio n. 6
0
def test_transform_ewma_x():

    transform_config = {'name': 'ewma', 'halflife': 20}

    feature = FinancialFeature(
            name='close',
            transformation=transform_config,
            normalization='standard',
            nbins=10,
            ndays=10,
            resample_minutes=0,
            start_market_minute=150,
            is_target=True,
            calendar=sample_market_calendar,
            local=True,
            length=10
        )

    transform = TransformEWMA(transform_config)

    raw_dataframe = financial_data_fixtures[feature.name]

    processed_prediction_data_x = transform.transform_x(feature, raw_dataframe)

    expected_result = raw_dataframe.ewm(halflife=transform.halflife).mean()
    assert_almost_equal(processed_prediction_data_x.values, expected_result.values, ASSERT_NDECIMALS)
Esempio n. 7
0
def test_transform_mtf_x():

    transform_config = {'name': 'mtf', 'image_size': 24}

    feature = FinancialFeature(name='close',
                               transformation=transform_config,
                               normalization='standard',
                               nbins=10,
                               ndays=10,
                               resample_minutes=0,
                               start_market_minute=150,
                               is_target=True,
                               calendar=sample_market_calendar,
                               local=True,
                               length=10)

    transform = TransformMTF(transform_config)

    data_dict_x = financial_data_fixtures
    raw_dataframe = data_dict_x[feature.name]

    transformed_data = transform.transform_x(feature, raw_dataframe)

    assert transformed_data.shape == (transform.image_size**2,
                                      raw_dataframe.shape[1])
def test_tranform_gasf_x():

    transform_config = {'name': 'gasf', 'image_size': 24}
    feature = FinancialFeature(name='close',
                               transformation=transform_config,
                               normalization='standard',
                               nbins=10,
                               ndays=10,
                               resample_minutes=0,
                               start_market_minute=150,
                               is_target=True,
                               calendar=sample_market_calendar,
                               local=True,
                               length=10)

    transform = TransformGASF(transform_config)
    raw_dataframe = financial_data_fixtures[feature.name]

    _perform_test(feature, raw_dataframe, transform)
Esempio n. 9
0
def test_transform_x_log_return_with_local_feature():

    transform_config = {'name': 'log-return'}

    feature = FinancialFeature(name='close',
                               transformation=transform_config,
                               normalization=None,
                               nbins=10,
                               ndays=5,
                               resample_minutes=0,
                               start_market_minute=90,
                               is_target=True,
                               calendar=sample_market_calendar,
                               local=True,
                               length=35)
    data_dict_x = financial_data_fixtures
    raw_dataframe = data_dict_x[feature.name]

    transform = TransformLogReturn(transform_config)
    transformed_data = transform.transform_x(feature, raw_dataframe)['AAPL']

    assert not np.isnan(transformed_data.iloc[0])
Esempio n. 10
0
    def setUp(self):
        self.feature_close_with_value_transform = FinancialFeature(
            name='open',
            transformation={'name': 'value'},
            normalization=None,
            nbins=5,
            ndays=2,
            resample_minutes=0,
            start_market_minute=30,
            is_target=True,
            calendar=sample_market_calendar,
            local=False,
            length=15,
            classify_per_series=True)
        self.feature_close_with_log_return_transform = FinancialFeature(
            name='close',
            transformation={'name': 'log-return'},
            normalization=None,
            nbins=10,
            ndays=5,
            resample_minutes=0,
            start_market_minute=90,
            is_target=True,
            calendar=sample_market_calendar,
            local=False,
            length=35,
            classify_per_series=True)
        self.feature_high_with_log_return_transform = FinancialFeature(
            name='high',
            transformation={'name': 'log-return'},
            normalization='standard',
            nbins=None,
            ndays=10,
            resample_minutes=0,
            start_market_minute=150,
            is_target=True,
            calendar=sample_market_calendar,
            local=False,
            length=69,
            classify_per_series=True)

        self.feature_list = [
            self.feature_close_with_value_transform,
            self.feature_close_with_log_return_transform,
            self.feature_high_with_log_return_transform
        ]
        'resample_minutes': 60,
        'start_market_minute': 1,
        'is_target': True,
        KEY_EXCHANGE: 'NYSE',
        'local': True,
        'length': 10
    },
]
sample_fin_feature_list = FeatureList([
    FinancialFeature(name='open',
                     transformation={'name': 'value'},
                     normalization=None,
                     nbins=5,
                     ndays=2,
                     resample_minutes=60,
                     start_market_minute=1,
                     is_target=False,
                     calendar=sample_market_calendar,
                     classify_per_series=False,
                     normalise_per_series=False,
                     local=True,
                     length=10),
    FinancialFeature(name='close',
                     transformation={'name': 'log-return'},
                     normalization=None,
                     nbins=10,
                     ndays=5,
                     resample_minutes=60,
                     start_market_minute=1,
                     is_target=False,
                     calendar=sample_market_calendar,
Esempio n. 12
0
class TestFinancialFeature(TestCase):
    def setUp(self):
        self.feature_close_with_value_transform = FinancialFeature(
            name='open',
            transformation={'name': 'value'},
            normalization=None,
            nbins=5,
            ndays=2,
            resample_minutes=0,
            start_market_minute=30,
            is_target=True,
            calendar=sample_market_calendar,
            local=False,
            length=15,
            classify_per_series=True)
        self.feature_close_with_log_return_transform = FinancialFeature(
            name='close',
            transformation={'name': 'log-return'},
            normalization=None,
            nbins=10,
            ndays=5,
            resample_minutes=0,
            start_market_minute=90,
            is_target=True,
            calendar=sample_market_calendar,
            local=False,
            length=35,
            classify_per_series=True)
        self.feature_high_with_log_return_transform = FinancialFeature(
            name='high',
            transformation={'name': 'log-return'},
            normalization='standard',
            nbins=None,
            ndays=10,
            resample_minutes=0,
            start_market_minute=150,
            is_target=True,
            calendar=sample_market_calendar,
            local=False,
            length=69,
            classify_per_series=True)

        self.feature_list = [
            self.feature_close_with_value_transform,
            self.feature_close_with_log_return_transform,
            self.feature_high_with_log_return_transform
        ]

    def test_get_start_timestamp_x(self):
        start_date_str = '20150101'
        end_date_str = '20150501'

        market_open_list = sample_market_calendar.schedule(
            start_date_str, end_date_str).market_open
        prediction_timestamp = market_open_list[20] + timedelta(minutes=15)

        start_timestamp_x_1 = self.feature_close_with_value_transform._get_start_timestamp_x(
            prediction_timestamp)
        expected_start_timestamp_x1 = pd.Timestamp('2015-01-29 15:00:00+0000',
                                                   tz='UTC')
        assert start_timestamp_x_1 == expected_start_timestamp_x1

        start_timestamp_x_2 = self.feature_close_with_log_return_transform._get_start_timestamp_x(
            prediction_timestamp)
        expected_start_timestamp_x2 = pd.Timestamp('2015-01-26 16:00:00+0000',
                                                   tz='UTC')
        assert start_timestamp_x_2 == expected_start_timestamp_x2

        start_timestamp_x_3 = self.feature_high_with_log_return_transform._get_start_timestamp_x(
            prediction_timestamp)
        expected_start_timestamp_x3 = pd.Timestamp('2015-01-16 17:00:00+0000',
                                                   tz='UTC')
        assert start_timestamp_x_3 == expected_start_timestamp_x3

    def test_select_prediction_data(self):
        data_frame = financial_data_fixtures[
            self.feature_close_with_value_transform.name]
        start_date = data_frame.index[0].date()
        end_date = data_frame.index[-1].date()

        market_open_list = sample_market_calendar.schedule(
            str(start_date), str(end_date)).market_open
        prediction_timestamp = market_open_list[20] + timedelta(minutes=15)

        selected_prediction_data = \
            self.feature_close_with_value_transform._select_prediction_data_x(data_frame, prediction_timestamp)

        last_index = np.argwhere(
            data_frame.index <= prediction_timestamp)[-1][0] + 1
        first_index = last_index - self.feature_close_with_value_transform.length
        expected_data_frame = data_frame.iloc[first_index:last_index]

        assert selected_prediction_data.equals(expected_data_frame)

    @staticmethod
    def _run_get_prediction_data_test(feature, expected_length):
        data_frame = financial_data_fixtures[feature.name]
        start_date = data_frame.index[0].date()
        end_date = data_frame.index[-1].date()

        market_open_list = sample_market_calendar.schedule(
            str(start_date), str(end_date)).market_open
        prediction_timestamp = market_open_list[20] + timedelta(minutes=30)
        target_timestamp = market_open_list[21] + timedelta(minutes=90)

        prediction_data_x = feature.get_prediction_features(
            data_frame, prediction_timestamp)

        prediction_data_y = feature.get_prediction_targets(
            data_frame, prediction_timestamp, target_timestamp)

        assert isinstance(prediction_data_x, pd.DataFrame) and isinstance(
            prediction_data_y, pd.Series)
        assert len(prediction_data_x) == expected_length
        assert_array_equal(prediction_data_x.columns, prediction_data_y.index)

    def test_get_prediction_data(self):
        expected_length_list = [15, 35, 69]
        for feature, expected_length in zip(self.feature_list,
                                            expected_length_list):
            self._run_get_prediction_data_test(feature, expected_length)

    def test_declassify_single_predict_y(self):

        train_labels = np.stack(
            (TEST_ARRAY, TEST_ARRAY, TEST_ARRAY, TEST_ARRAY, TEST_ARRAY))
        predict_labels = {
            'open': train_labels[:, int(0.5 * train_labels.shape[1])]
        }

        for feature in self.feature_list:
            if feature.nbins:
                predict_y = np.zeros_like(predict_labels[list(
                    predict_labels.keys())[0]])
                predict_y[0] = 1
            else:
                predict_y = predict_labels
            with pytest.raises(NotImplementedError):
                feature.declassify_single_predict_y(predict_y)
Esempio n. 13
0
    def setUp(self):

        transform_config = {'name': 'log-return'}

        self.feature1 = FinancialFeature(name='close',
                                         transformation=transform_config,
                                         normalization='min_max',
                                         nbins=10,
                                         ndays=5,
                                         resample_minutes=0,
                                         start_market_minute=90,
                                         is_target=True,
                                         calendar=sample_market_calendar,
                                         local=False,
                                         length=35,
                                         normalise_per_series=True)

        self.feature2 = FinancialFeature(name='close',
                                         transformation=transform_config,
                                         normalization='standard',
                                         nbins=10,
                                         ndays=5,
                                         resample_minutes=0,
                                         start_market_minute=90,
                                         is_target=True,
                                         calendar=sample_market_calendar,
                                         local=False,
                                         length=35,
                                         normalise_per_series=True)

        self.feature3 = FinancialFeature(name='close',
                                         transformation=transform_config,
                                         normalization='gaussian',
                                         nbins=10,
                                         ndays=5,
                                         resample_minutes=0,
                                         start_market_minute=90,
                                         is_target=True,
                                         calendar=sample_market_calendar,
                                         local=False,
                                         length=35,
                                         normalise_per_series=True)

        self.feature4 = FinancialFeature(name='close',
                                         transformation=transform_config,
                                         normalization='robust',
                                         nbins=10,
                                         ndays=5,
                                         resample_minutes=0,
                                         start_market_minute=90,
                                         is_target=True,
                                         calendar=sample_market_calendar,
                                         local=False,
                                         length=35,
                                         normalise_per_series=True)

        transform_config_2 = {'name': 'value'}

        self.feature5 = FinancialFeature(name='close',
                                         transformation=transform_config_2,
                                         normalization='min_max',
                                         nbins=5,
                                         ndays=5,
                                         resample_minutes=0,
                                         start_market_minute=90,
                                         is_target=True,
                                         calendar=sample_market_calendar,
                                         local=False,
                                         length=35,
                                         normalise_per_series=True,
                                         classify_per_series=True)
Esempio n. 14
0
class TestFeatureNormalization(TestCase):
    def setUp(self):

        transform_config = {'name': 'log-return'}

        self.feature1 = FinancialFeature(name='close',
                                         transformation=transform_config,
                                         normalization='min_max',
                                         nbins=10,
                                         ndays=5,
                                         resample_minutes=0,
                                         start_market_minute=90,
                                         is_target=True,
                                         calendar=sample_market_calendar,
                                         local=False,
                                         length=35,
                                         normalise_per_series=True)

        self.feature2 = FinancialFeature(name='close',
                                         transformation=transform_config,
                                         normalization='standard',
                                         nbins=10,
                                         ndays=5,
                                         resample_minutes=0,
                                         start_market_minute=90,
                                         is_target=True,
                                         calendar=sample_market_calendar,
                                         local=False,
                                         length=35,
                                         normalise_per_series=True)

        self.feature3 = FinancialFeature(name='close',
                                         transformation=transform_config,
                                         normalization='gaussian',
                                         nbins=10,
                                         ndays=5,
                                         resample_minutes=0,
                                         start_market_minute=90,
                                         is_target=True,
                                         calendar=sample_market_calendar,
                                         local=False,
                                         length=35,
                                         normalise_per_series=True)

        self.feature4 = FinancialFeature(name='close',
                                         transformation=transform_config,
                                         normalization='robust',
                                         nbins=10,
                                         ndays=5,
                                         resample_minutes=0,
                                         start_market_minute=90,
                                         is_target=True,
                                         calendar=sample_market_calendar,
                                         local=False,
                                         length=35,
                                         normalise_per_series=True)

        transform_config_2 = {'name': 'value'}

        self.feature5 = FinancialFeature(name='close',
                                         transformation=transform_config_2,
                                         normalization='min_max',
                                         nbins=5,
                                         ndays=5,
                                         resample_minutes=0,
                                         start_market_minute=90,
                                         is_target=True,
                                         calendar=sample_market_calendar,
                                         local=False,
                                         length=35,
                                         normalise_per_series=True,
                                         classify_per_series=True)

    def test_fit_normalisation(self):

        symbol_data1 = np.random.randn(10000)

        self.feature1.fit_normalisation(symbol_data=symbol_data1)
        assert np.isclose(self.feature1.scaler.data_max_,
                          symbol_data1.max(),
                          rtol=1e-4)
        assert np.isclose(self.feature1.scaler.data_min_,
                          symbol_data1.min(),
                          rtol=1e-4)

        self.feature2.fit_normalisation(symbol_data=symbol_data1)
        assert np.isclose(self.feature2.scaler.mean_,
                          symbol_data1.mean(),
                          rtol=1e-4)
        assert np.isclose(self.feature2.scaler.var_,
                          symbol_data1.var(),
                          rtol=1e-4)

        self.feature3.fit_normalisation(symbol_data=symbol_data1)
        assert self.feature3.scaler.references_.shape == (1000, )
        assert self.feature3.scaler.quantiles_.shape == (1000, 1)

        self.feature4.fit_normalisation(symbol_data=symbol_data1)
        assert np.isclose(self.feature4.scaler.center_,
                          np.median(symbol_data1),
                          rtol=1e-4)

    def test_apply_normalisation(self):
        data = deepcopy(financial_data_fixtures['open'])

        for column in data.columns:
            self.feature1.fit_normalisation(symbol_data=data[column].values,
                                            symbol=column)

        self.feature1.apply_normalisation(data)
        np.testing.assert_allclose(data.max(), np.asarray([1., 1., 1., 1.,
                                                           1.]))
        np.testing.assert_allclose(data.min(), np.asarray([0., 0., 0., 0.,
                                                           0.]))

        for column in data.columns:
            self.feature2.fit_normalisation(symbol_data=data[column].values,
                                            symbol=column)

        self.feature2.apply_normalisation(data)
        np.testing.assert_allclose(data.mean(),
                                   np.asarray([0., 0., 0., 0., 0.]),
                                   atol=1e-4)

        for column in data.columns:
            self.feature3.fit_normalisation(symbol_data=data[column].values,
                                            symbol=column)

        self.feature3.apply_normalisation(data)
        np.testing.assert_allclose(data.mean(),
                                   np.asarray([0., 0., 0., 0., 0.]),
                                   atol=1e-3)

        for column in data.columns:
            self.feature4.fit_normalisation(symbol_data=data[column].values,
                                            symbol=column)

        self.feature4.apply_normalisation(data)
        np.testing.assert_allclose(np.median(data, axis=0),
                                   np.asarray([0., 0., 0., 0., 0.]),
                                   atol=1e-3)

    def test_apply_classification(self):
        symbols = ['SYM1', 'SYM2', 'SYM3']
        feature = self.feature5
        dataframe = pd.DataFrame([[5, 5, 5]], columns=symbols)

        symbol_data_1 = np.linspace(0, 10, 10)
        symbol_data_2 = np.linspace(0, 100, 100)

        feature.fit_classification('SYM1', symbol_data_1)
        feature.fit_classification('SYM2', symbol_data_2)

        classified_dataframe = feature.apply_classification(dataframe)
        expected_classified_dataframe = pd.DataFrame(
            [[0., 1.], [0., 0.], [1., 0.], [0., 0.], [0., 0.]],
            columns=symbols[:2])

        assert classified_dataframe.equals(expected_classified_dataframe)