Esempio n. 1
0
    def test_partial_fit_regression(self):
        data = make_regression(100, 2, 1)
        df = pd.DataFrame(data[0])
        df["label"] = data[1]

        with df.model() as m:
            fit_partial = m.fit(
                SkModel(
                    MLPRegressor(max_iter=1, random_state=42),
                    FeaturesAndLabels(features=[0, 1], labels=['label'])
                ),
                FittingParameter(
                    naive_splitter(0.3),
                    batch_size=10,
                    fold_epochs=10
                )
            )

        with df.model() as m:
            fit = m.fit(
                SkModel(
                    MLPRegressor(max_iter=10, random_state=42),
                    FeaturesAndLabels(features=[0, 1], labels=['label'])
                ),
                FittingParameter(naive_splitter(0.3))
            )

        self.assertAlmostEqual(df.model.predict(fit.model).iloc[0,-1], df.model.predict(fit_partial.model).iloc[0,-1], 4)
Esempio n. 2
0
    def test_partial_fit_classification(self):
        data = make_classification(100, 2, 1, 0, n_clusters_per_class=1)
        df = pd.DataFrame(data[0])
        df["label"] = data[1]

        with df.model() as m:
            fit_partial = m.fit(
                SkModel(
                    MLPClassifier(max_iter=1, random_state=42),
                    FeaturesAndLabels(features=[0, 1], labels=['label']),
                    classes=np.unique(data[1])
                ),
                FittingParameter(
                    stratified_random_splitter(0.3),
                    batch_size=10,
                    fold_epochs=10,
                )
            )

        with df.model() as m:
            fit = m.fit(
                SkModel(
                    MLPClassifier(max_iter=10, random_state=42),
                    FeaturesAndLabels(features=[0, 1], labels=['label'])
                ),
                FittingParameter(stratified_random_splitter(0.3))
            )

        self.assertAlmostEqual(df.model.predict(fit.model).iloc[0,-1], df.model.predict(fit_partial.model).iloc[0,-1], 4)
Esempio n. 3
0
    def provide_linear_regression_model(self):
        from sklearn.linear_model import LinearRegression
        from sklearn.neural_network import MLPRegressor
        from pandas_ml_utils import FeaturesAndLabels, SkModel

        return [
            (
                SkModel(LinearRegression(), FeaturesAndLabels(["x"], ["y"])),
                FittingParameter(epochs=1, fold_epochs=1, context="LinearRegression")
            ),
            (
                SkModel(
                    MLPRegressor(10, learning_rate_init=0.01, max_iter=9000, validation_fraction=0),
                    FeaturesAndLabels(["x"], ["y"])
                 ),
                FittingParameter(epochs=1, fold_epochs=1, context="MLPRegressor")
            ),
            (
                SkModel(
                    MLPRegressor(10, learning_rate_init=0.01, max_iter=1, validation_fraction=0, warm_start=True),
                    FeaturesAndLabels(["x"], ["y"])
                 ),
                FittingParameter(epochs=9000, fold_epochs=1, context="MLPRegressor partial fit")
            )
        ]
Esempio n. 4
0
    def _test_hyper_parameter_for_simple_model(self):
        from hyperopt import hp
        """given"""
        df = DF_TEST.copy()
        df['label'] = df["spy_Close"] > df["spy_Open"]
        """when fit with find hyper parameter"""
        fit = df.fit(SkModel(
            MLPClassifier(activation='tanh',
                          hidden_layer_sizes=(60, 50),
                          random_state=42),
            FeaturesAndLabels(features=['vix_Close'],
                              labels=['label'],
                              target_columns=["vix_Open"],
                              loss_column="spy_Volume")),
                     test_size=0.4,
                     test_validate_split_seed=42,
                     hyper_parameter_space={
                         'alpha': hp.choice('alpha', [0.0001, 10]),
                         'early_stopping': True,
                         'max_iter': 50,
                         '__max_evals': 4,
                         '__rstate': np.random.RandomState(42)
                     })
        """then test best parameter"""
        self.assertEqual(fit.model.skit_model.get_params()['alpha'], 0.0001)

        pass
Esempio n. 5
0
    def provide_regression_model(self, features_and_labels):
        model = SkModel(
            MLPRegressor(1, learning_rate_init=0.01, solver='sgd', activation='identity', momentum=0, max_iter=1500, n_iter_no_change=500, nesterovs_momentum=False, shuffle=False, validation_fraction=0.0, random_state=42),
            features_and_labels,
        )

        return model
Esempio n. 6
0
    def test_simple_classification_model(self):
        df = DF_NOTES.copy()

        with df.model() as m:
            fit = m.fit(
                SkModel(
                    MLPClassifier(activation='tanh', hidden_layer_sizes=(20, 12), random_state=42, max_iter=2),
                    FeaturesAndLabels(
                        features=["variance", "skewness", "kurtosis", "entropy"],
                        labels=["authentic"],
                        label_type=bool
                    )
                ),
                FittingParameter(stratified_random_splitter())
            )

        print(fit)
        html = fit._repr_html_()

        prediction = df.model.predict(fit.model)
        print(prediction)
        self.assertGreaterEqual(prediction[PREDICTION_COLUMN_NAME].iloc[-1].values, 0.68)

        backtest = df.model.backtest(fit.model)
        self.assertIn(FEATURE_COLUMN_NAME, backtest.df)
        self.assertIn(LABEL_COLUMN_NAME, backtest.df)
        np.testing.assert_array_almost_equal(prediction[PREDICTION_COLUMN_NAME].iloc[-1].values,
                                             backtest.df[PREDICTION_COLUMN_NAME].iloc[-1].values)

        # test multiple samples
        samples = df.model.predict(fit.model, samples=2)
        self.assertIsInstance(samples[PREDICTION_COLUMN_NAME].iloc[-1, 0], list)
        self.assertEqual(2, len(samples[PREDICTION_COLUMN_NAME].iloc[-1, 0]))
Esempio n. 7
0
    def test_simple_classification_model(self):
        df = DF_TEST.copy()

        fit = df.model.fit(
            SkModel(
                MLPClassifier(activation='tanh', hidden_layer_sizes=(60, 50), random_state=42, max_iter=2),
                FeaturesAndLabels(
                    features=[
                        lambda df: df["Close"].ta.rsi().ta.rnn(28),
                        lambda df: (df["Volume"] / df["Volume"].ta.ema(14) - 1).ta.rnn(28)
                    ],
                    labels=[
                        lambda df: (df["Close"] > df["Open"]).shift(-1),
                    ]
                ),
                # kwargs
                forecasting_time_steps=7
            )
        )

        print(fit)
        html = fit._repr_html_()

        prediction = df.model.predict(fit.model)
        print(prediction)
        self.assertIsInstance(prediction[PREDICTION_COLUMN_NAME, 0].iloc[-1], (float, np.float, np.float32, np.float64))

        backtest = df.model.backtest(fit.model)

        # test multiple samples
        samples = df.model.predict(fit.model, samples=2)
        self.assertIsInstance(samples[PREDICTION_COLUMN_NAME, 0].iloc[-1], list)
        self.assertEqual(2, len(samples[PREDICTION_COLUMN_NAME, 0].iloc[-1]))
Esempio n. 8
0
    def test_linear_model(self):
        df = DF_TEST.copy()

        fit = df.model.fit(
            SkModel(
                Lasso(),
                FeaturesAndLabels(
                    features=[
                        lambda df: df["Close"].ta.rsi().ta.rnn(28),
                        lambda df: (df["Volume"] / df["Volume"].ta.ema(14) - 1).ta.rnn(28)
                    ],
                    labels=[
                        lambda df: (df["Close"] / df["Open"] - 1).shift(-1),
                    ]
                ),
                summary_provider=RegressionSummary
            ),
            NaiveSplitter()
        )

        print(fit)

        prediction = df.model.predict(fit.model)
        print(prediction)

        backtest = df.model.backtest(fit.model)
Esempio n. 9
0
    def test_simple_regression_model(self):
        df = DF_TEST.copy()

        fit = df.model.fit(
            SkModel(
                MLPRegressor(activation='tanh', hidden_layer_sizes=(60, 50), random_state=42, max_iter=2),
                FeaturesAndLabels(
                    features=[
                        lambda df: df["Close"].ta.rsi().ta.rnn(28),
                        lambda df: (df["Volume"] / df["Volume"].ta.ema(14) - 1).ta.rnn(28)
                    ],
                    labels=[
                        lambda df: (df["Close"] / df["Open"] - 1).shift(-1),
                    ]
                ),
                summary_provider=RegressionSummary
            ),
            NaiveSplitter()
        )

        print(fit)
        html = fit._repr_html_()

        prediction = df.model.predict(fit.model)
        print(prediction)
        self.assertIsInstance(prediction[PREDICTION_COLUMN_NAME, 0].iloc[-1], (float, np.float, np.float32, np.float64))

        backtest = df.model.backtest(fit.model)
Esempio n. 10
0
    def test_linear_model(self):
        df = DF_NOTES.copy()

        with df.model() as m:
            fit = m.fit(
                SkModel(
                    Lasso(),
                    FeaturesAndLabels(
                        features=[
                            lambda df: df["variance"],
                            lambda df: (df["skewness"] / df["kurtosis"]).rename("engineered")
                        ],
                        labels=[
                            'authentic'
                        ]
                    )
                ),
                FittingParameter(naive_splitter())
            )

        print(fit)

        prediction = df.model.predict(fit.model)
        print(prediction)

        backtest = df.model.backtest(fit.model)
        self.assertLess(backtest.model.sk_model.coef_[0], 1e-5)
Esempio n. 11
0
    def test_KFold(self):
        df = DF_TEST.copy()

        fit = df.model.fit(
            SkModel(
                MLPClassifier(activation='tanh', hidden_layer_sizes=(60, 50), random_state=42, max_iter=2),
                FeaturesAndLabels(
                    features=extract_with_post_processor(
                        [
                            lambda df: df["Close"].ta.trix(),
                            lambda df: df["Close"].ta.ppo(),
                            lambda df: df["Close"].ta.apo(),
                            lambda df: df["Close"].ta.macd(),
                            lambda df: df.ta.adx(),
                        ],
                        lambda df: df.ta.rnn(range(10))
                    ),
                    labels=[
                        lambda df: df["Close"].ta.sma(period=60) \
                            .ta.cross(df["Close"].ta.sma(period=20)) \
                            .ta.rnn([1, 2, 3, 4, 5]) \
                            .abs() \
                            .sum(axis=1) \
                            .shift(-5) \
                            .astype(bool)

                    ]
                )
            ),
            RandomSplits(test_size=0.4,
                         test_validate_split_seed=42,
                         cross_validation=(1, KFoldBoostRareEvents(n_splits=5).split))
        )
Esempio n. 12
0
    def test_simple_classification_cross_validation(self):
        df = DF_NOTES.copy()

        with df.model() as m:
            fit = m.fit(
                SkModel(
                    MLPClassifier(activation='tanh', hidden_layer_sizes=(20, 12), random_state=42, max_iter=2),
                    FeaturesAndLabels(
                        features=["variance", "skewness", "kurtosis", "entropy"],
                        labels=["authentic"],
                        label_type=bool
                    )
                ),
                FittingParameter(
                    splitter=random_splitter(),
                    cross_validation=KFold(3, random_state=42, shuffle=True)
                )
            )

        print(fit)
        html = fit._repr_html_()

        prediction = df.model.predict(fit.model)
        print(prediction)
        self.assertGreaterEqual(prediction[PREDICTION_COLUMN_NAME].iloc[-1].values, 0.65)
Esempio n. 13
0
    def provide_classification_model(self, features_and_labels):
        model = SkModel(
            MLPClassifier(activation='logistic', max_iter=1000, hidden_layer_sizes=(3,), alpha=0.001, solver='lbfgs', random_state=42),
            features_and_labels,
        )

        return model
Esempio n. 14
0
    def test_multi_model(self):
        """given some toy classification data"""
        df = pd.DataFrame({
            "a": [
                1,
                0,
                1,
                0,
                1,
                0,
                1,
                0,
            ],
            "b": [
                0,
                0,
                1,
                1,
                0,
                0,
                1,
                1,
            ],
            "c": [
                1,
                0,
                0,
                1,
                1,
                0,
                0,
                1,
            ],
        })

        model = MultiModel(SkModel(
            MLPClassifier(activation='logistic',
                          max_iter=1000,
                          hidden_layer_sizes=(3, ),
                          alpha=0.001,
                          solver='lbfgs',
                          random_state=42),
            FeaturesAndLabels(features=["a", "b"],
                              labels=[lambda df, i: df["c"].rename(f"c_{i}")],
                              label_type=int),
            summary_provider=ClassificationSummary),
                           2,
                           model_index_variable="i",
                           summary_provider=MultiModelSummary)

        fit = df.model.fit(model,
                           NaiveSplitter(0.49),
                           epochs=1500,
                           verbose=True)
        print(fit.training_summary._repr_html_()[:100])

        pdf = df.model.predict(fit.model, tail=2)
        print(pdf)
Esempio n. 15
0
    def provide_non_linear_regression_model(self):
        from sklearn.neural_network import MLPRegressor
        from pandas_ml_utils import FeaturesAndLabels, SkModel

        return [
            (
                SkModel(
                    MLPRegressor(200, learning_rate_init=0.001, max_iter=5000, validation_fraction=0),
                    FeaturesAndLabels(["x"], ["y"])
                ),
                FittingParameter(epochs=1, context="epoch 1 fit"),
            ),
            (
                SkModel(
                    MLPRegressor(200, learning_rate_init=0.001, max_iter=1, validation_fraction=0, warm_start=True),
                    FeaturesAndLabels(["x"], ["y"])
                ),
                FittingParameter(epochs=5000, context="partial fit"),
            )
        ]
Esempio n. 16
0
    def test_simple_classification_model_with_all_options(self):
        df = DF_NOTES.copy()

        with df.model() as m:
            fit = m.fit(
                SkModel(
                    MLPClassifier(activation='tanh', hidden_layer_sizes=(20, 12), random_state=42, max_iter=2),
                    FeaturesAndLabels(
                        features=["variance", "skewness", "kurtosis", "entropy"],
                        sample_weights=["variance"],
                        gross_loss=["kurtosis"],
                        targets=["entropy"],
                        labels=["authentic"],
                        label_type=bool
                    )
                ),
                FittingParameter(stratified_random_splitter())
            )

        # should not thro an error
        html = fit._repr_html_()

        # fit resulting columns
        print(fit.test_summary.df)
        self.assertIn(GROSS_LOSS_COLUMN_NAME, fit.training_summary.df)
        self.assertIn(FEATURE_COLUMN_NAME, fit.training_summary.df)
        self.assertIn(LABEL_COLUMN_NAME, fit.training_summary.df)
        self.assertIn(TARGET_COLUMN_NAME, fit.training_summary.df)

        self.assertIn(FEATURE_COLUMN_NAME, fit.test_summary.df)
        self.assertIn(LABEL_COLUMN_NAME, fit.test_summary.df)
        self.assertIn(TARGET_COLUMN_NAME, fit.test_summary.df)

        # prediction resulting columns
        prediction = df.model.predict(fit.model)
        print(prediction)
        self.assertIn(FEATURE_COLUMN_NAME, prediction)
        self.assertIn(TARGET_COLUMN_NAME, prediction)

        # backtest resulting columns
        backtest = df.model.backtest(fit.model)
        print(backtest.df)

        self.assertEqual(len(df), len(backtest.df))
        self.assertIn(FEATURE_COLUMN_NAME, backtest.df)
        self.assertIn(LABEL_COLUMN_NAME, backtest.df)
        self.assertIn(TARGET_COLUMN_NAME, backtest.df)
        self.assertIn(GROSS_LOSS_COLUMN_NAME, backtest.df)
Esempio n. 17
0
    def test_future_bband_quantile_clasification(self):
        df = DF_TEST.copy()

        fit = df.model.fit(
            SkModel(
                MLPClassifier(activation='tanh',
                              hidden_layer_sizes=(60, 50),
                              random_state=42,
                              warm_start=True,
                              max_iter=2),
                FeaturesAndLabels(
                    features=extract_with_post_processor([
                        lambda df: df["Close"].ta.macd()._[
                            ['macd.*', 'signal.*']],
                        lambda df: df.ta.adx()._[['+DI', '-DM', '+DM']],
                        lambda df: df["Close"].ta.mom(),
                        lambda df: df["Close"].ta.apo(),
                        lambda df: df.ta.atr(),
                        lambda df: df["Close"].ta.trix(),
                    ], lambda df: df.ta.rnn(280)),
                    labels=[
                        lambda df: df["Close"].ta.future_bband_quantile(
                            include_mean=False).ta.one_hot_encode_discrete()
                    ],
                    targets=[
                        lambda df: df["Close"].ta.bbands()[["lower", "upper"]]
                    ]),
                summary_provider=ClassificationSummary,
            ),
            RandomSplits(
                test_size=0.4,
                test_validate_split_seed=42,
                cross_validation=(1, KEquallyWeightEvents(n_splits=3).split)))

        print(fit)
        prediction = df.model.predict(fit.model, tail=3)
        self.assertEqual(3, len(prediction))
        self.assertEqual(
            (3, ),
            np.array(prediction[PREDICTION_COLUMN_NAME].iloc[-1, -1]).shape)

        target_predictions = prediction.map_prediction_to_target()
        print(target_predictions)
        self.assertEqual(9, len(target_predictions))
Esempio n. 18
0
    def test_debug(self):
        df = DF_TEST.copy()

        fit = df.model.fit(
            SkModel(
                MLPClassifier(activation='tanh', hidden_layer_sizes=(60, 50), random_state=42, warm_start=True, max_iter=2),
                FeaturesAndLabels(
                    features=extract_with_post_processor(
                        [
                            lambda df: df["Close"].q.ta_macd().ml[['macd.*', 'signal.*']],
                            lambda df: df.q.ta_adx().ml[['+DI', '-DM', '+DM']],
                            lambda df: df["Close"].q.ta_mom(),
                            lambda df: df["Close"].q.ta_apo(),
                            lambda df: df.q.ta_atr(),
                            lambda df: df["Close"].q.ta_trix(),
                        ],
                        lambda df: df.q.ta_rnn(280)
                    ),
                    labels=[
                        lambda df: df["Close"].q.ta_future_bband_quantile().q.ta_one_hot_encode_discrete()
                    ],
                    targets=[
                        lambda df: df["Close"].q.ta_bbands()[["lower", "upper"]]
                    ]
                ),
                summary_provider=ClassificationSummary,
            ),
            test_size=0.4,
            test_validate_split_seed=42,
            cross_validation=(1, KEquallyWeightEvents(n_splits=3).split),
        )

        print(fit)
        prediction = df.model.predict(fit.model, tail=3)
        self.assertEqual(3, len(prediction))

        target_predictions = prediction.map_prediction_to_target()
        print(target_predictions)
        self.assertEqual(9, len(target_predictions))
Esempio n. 19
0
    def test_multi_model_kwargs(self):
        """given some toy classification data"""
        df = pd.DataFrame({
            "a": [
                1,
                0,
                1,
                0,
                1,
                0,
                1,
                0,
            ],
            "b": [
                0,
                0,
                1,
                1,
                0,
                0,
                1,
                1,
            ],
            "c": [
                1,
                0,
                0,
                1,
                1,
                0,
                0,
                1,
            ],
        })

        model = MultiModel(SkModel(
            MLPClassifier(activation='logistic',
                          max_iter=1000,
                          hidden_layer_sizes=(3, ),
                          alpha=0.001,
                          solver='lbfgs',
                          random_state=42),
            FeaturesAndLabels(features=["a", "b"],
                              labels=[lambda df, index: df["c"].rename(index)],
                              label_type=int,
                              index=["z"]),
        ), ["c1", "c2"],
                           model_index_variable="index")

        fit = df.model.fit(model,
                           NaiveSplitter(0.49),
                           epochs=1500,
                           verbose=True)
        print(fit.training_summary.df)

        self.assertEqual(
            4, len(fit.training_summary.df[PREDICTION_COLUMN_NAME, "c1"]))
        self.assertEqual(
            4, len(fit.training_summary.df[PREDICTION_COLUMN_NAME, "c2"]))
        np.testing.assert_array_almost_equal(
            fit.training_summary.df[PREDICTION_COLUMN_NAME, "c1"],
            fit.training_summary.df[PREDICTION_COLUMN_NAME, "c2"])