def test_binary_classification(self):
        """given"""
        df = pd.read_csv(TEST_FILE, index_col='Date')
        df["sma"] = SMA(df["spy_Close"])
        df["is_above"] = (df["spy_Close"] / df["sma"]) > 1

        model = pdu.SkModel(
            MLPClassifier(activation='tanh', hidden_layer_sizes=(60, 50), random_state=42),
            pdu.FeaturesAndLabels(features=['vix_Close'],
                                  labels=["is_above"],
                                  targets=lambda frame: frame["sma"],
                                  gross_loss=lambda frame: frame["spy_Close"] - frame["sma"]))

        """when"""
        fit = df.fit(model, test_size=0.4, test_validate_split_seed=42)
        fit_summary_df = fit.training_summary.df
        bt_summary_df = df.backtest(fit.model).df
        predict_df = df.predict(fit.model, tail=1)

        """then"""
        self.assertListEqual(fit_summary_df.columns.tolist(), [(PREDICTION_COLUMN_NAME, 'is_above'), (LABEL_COLUMN_NAME, 'is_above'), (GROSS_LOSS_COLUMN_NAME, GROSS_LOSS_COLUMN_NAME), (TARGET_COLUMN_NAME, 'sma')])
        self.assertEqual(len(fit_summary_df), 4023)

        self.assertEqual(bt_summary_df.shape, (6706, 15))

        self.assertListEqual(predict_df.columns.tolist(), [(PREDICTION_COLUMN_NAME, 'is_above'), (TARGET_COLUMN_NAME, 'sma')])
Esempio n. 2
0
    def test_binary_classification_kfold(self):
        """given"""
        df = pd.read_csv(TEST_FILE, index_col='Date')
        df['label'] = df["spy_Close"] > df["spy_Open"]
        """and cross validation"""
        called = False
        cv = KFold(2)

        def split(x, y=None, group=None):
            nonlocal called
            called = True
            return cv.split(x, y, group)

        """when"""
        fit = df.fit(pdu.SkModel(
            MLPClassifier(activation='tanh',
                          hidden_layer_sizes=(60, 50),
                          alpha=0.001,
                          random_state=42),
            pdu.FeaturesAndLabels(
                features=['vix_Close'],
                labels=['label'],
                gross_loss=lambda df: df["spy_Close"] - df["spy_Open"])),
                     test_size=0.4,
                     cross_validation=(1, split),
                     test_validate_split_seed=42)
        """then"""
        # no exception thrown
        self.assertTrue(called)
Esempio n. 3
0
 def test_invalid_multi_model(self):
     """expect"""
     self.assertRaises(
         ValueError, lambda: pdu.MultiModel(
             pdu.MultiModel(
                 pdu.SkModel(MLPClassifier(), pdu.FeaturesAndLabels([], {}))
             )))
 def test_hyper_parameter(self):
     from hyperopt import hp
     """given"""
     df = pd.read_csv(TEST_FILE, index_col='Date')
     df['label'] = df["spy_Close"] > df["spy_Open"]
     """when fit with find hyper parameter"""
     fit = df.fit(pdu.SkModel(
         MLPClassifier(activation='tanh',
                       hidden_layer_sizes=(60, 50),
                       random_state=42),
         pdu.FeaturesAndLabels(features=['vix_Close'],
                               labels=['label'],
                               target_columns=["vix_Open"],
                               loss_column="spy_Volume")),
                  test_size=0.4,
                  test_validate_split_seed=42,
                  hyper_parameter_space={
                      'alpha': hp.choice('alpha', [0.0001, 10]),
                      'early_stopping': True,
                      'max_iter': 50,
                      '__max_evals': 4,
                      '__rstate': np.random.RandomState(42)
                  })
     """then test best parameter"""
     self.assertEqual(fit.model.skit_model.get_params()['alpha'], 0.0001)
Esempio n. 5
0
 def test__inderval_index_encoder(self):
     """given features and labels"""
     df["sma_ratio"] = df["Close"].ta_future_pct_of_mean(0, 20)
     df["forward_sma_ratio"] = df["Close"].ta_future_pct_of_mean(3, 20)
     """and an IntervalIndex"""
     buckets = pd.IntervalIndex.from_breaks(
         [-float("inf"), -0.05, 0.0, 0.05,
          float("inf")])
     """and a model"""
     model = pmu.SkModel(
         MLPClassifier(activation='tanh',
                       hidden_layer_sizes=(60, 50),
                       random_state=42),
         pmu.FeaturesAndLabels(features=['sma_ratio'],
                               labels=IntervalIndexEncoder(
                                   "forward_sma_ratio", buckets)))
     """when"""
     fit = df.fit(
         model,
         test_size=0.4,
         test_validate_split_seed=42,
     )
     predicted = df.predict(fit.model)
     """then"""
     print(predicted)
     self.assertTrue(df.predict(fit.model).values[-1, -1] < 0.1)
Esempio n. 6
0
    def test_pre_processor(self):
        """given"""
        df = pd.read_csv(TEST_FILE, index_col='Date')

        """when"""
        fit = df.fit(
            pdu.SkModel(
                MLPClassifier(activation='tanh', hidden_layer_sizes=(60, 50), alpha=0.001, random_state=42),
                pdu.FeaturesAndLabels(features=['feature'], labels=['label'], label_type=int,
                                      gross_loss=lambda df: df["spy_Close"] - df["spy_Open"],
                                      pre_processor=lambda _df: pdu.LazyDataFrame(
                                          _df,
                                          feature=lambda f: f["vix_Close"].rolling(2).mean(),
                                          label=lambda f: (f["spy_Close"].shift(1) > f["spy_Open"]).shift(-1)).to_dataframe())),
            test_size=0.4,
            test_validate_split_seed=42)

        bt = df.backtest(fit.model)
        p = df.predict(fit.model, 2)

        """then fit"""
        self.assertListEqual(fit.test_summary.df.columns.tolist(), [(PREDICTION_COLUMN_NAME, 'label'), (LABEL_COLUMN_NAME, 'label'), (GROSS_LOSS_COLUMN_NAME, GROSS_LOSS_COLUMN_NAME)])

        """ and backtest"""
        self.assertListEqual(bt.df.columns.tolist(), [(PREDICTION_COLUMN_NAME, 'label'), (LABEL_COLUMN_NAME, 'label'), (GROSS_LOSS_COLUMN_NAME, GROSS_LOSS_COLUMN_NAME), *[(SOURCE_COLUMN_NAME, c) for c in df.columns], (SOURCE_COLUMN_NAME, "feature"), (SOURCE_COLUMN_NAME, "label")])
        self.assertEqual(bt.df.index[-1], "2019-09-13")

        """ and prediction"""
        self.assertListEqual(p.columns.tolist(), [(PREDICTION_COLUMN_NAME, 'label')])
        self.assertEqual(p.index[-1], "2019-09-16")
        self.assertEqual(len(p), 2)
Esempio n. 7
0
    def test__2d_encoding(self):
        """given"""
        df = pd.read_csv(TEST_FILE, index_col='Date')
        df["label"] = df["spy_Close"] > df["spy_Open"]

        class ArrayEncoder(TargetLabelEncoder):
            def __init__(self):
                super().__init__()

            @property
            def labels_source_columns(self) -> List[str]:
                return ["spy_Close"]

            @property
            def encoded_labels_columns(self) -> List[str]:
                return ["2D"]

            def encode(self, df: pd.DataFrame, **kwargs) -> pd.DataFrame:
                res = pd.DataFrame({}, index=df.index)
                res["2D"] = df["spy_Close"] = df["spy_Close"].apply(
                    lambda r: np.array([r, r]))
                return res

        """when"""
        model = MockModel(
            pdu.FeaturesAndLabels(["spy_Close"],
                                  ArrayEncoder(),
                                  feature_lags=[0, 1, 2]))
        fit = df.fit(model)
        """then"""
        print(fit.test_summary.df)
        self.assertEqual(fit.test_summary.df.shape, (2682, 2))
        self.assertEqual(
            integrate_nested_arrays(fit.test_summary.df.values).shape,
            (2682, 2, 2))
Esempio n. 8
0
    def test_extractor(self):
        """given"""
        df = pd.read_csv(TEST_FILE, index_col='Date')

        """when"""
        extractor = df.features_and_label_extractor(
            pdu.SkModel(
                MLPClassifier(activation='tanh', hidden_layer_sizes=(60, 50), alpha=0.001, random_state=42),
                pdu.FeaturesAndLabels(features=['feature'], labels=['label'],
                                      gross_loss=lambda df: df["spy_Close"] - df["spy_Open"],
                                      targets=lambda df: df["spy_Close"],
                                      pre_processor=lambda _df: pdu.LazyDataFrame(
                                          _df,
                                          feature=lambda f: f["vix_Close"].rolling(2).mean(),
                                          label=lambda f: (f["spy_Close"].shift(1) > f["spy_Open"]).shift(-1)))))

        fnl = extractor.features_labels_weights_df

        """then"""
        self.assertListEqual(extractor.features_df.columns.tolist(), ['feature'])
        self.assertListEqual(extractor.labels_df.columns.tolist(), ['label'])
        self.assertListEqual(extractor.source_df[SOURCE_COLUMN_NAME].columns.tolist(), [*df.columns.tolist(), 'feature', 'label'])
        self.assertListEqual(extractor.target_df.columns.tolist(), [(TARGET_COLUMN_NAME, "spy_Close")])
        self.assertListEqual(extractor.gross_loss_df.columns.tolist(), [(GROSS_LOSS_COLUMN_NAME, GROSS_LOSS_COLUMN_NAME)])
        self.assertEqual(fnl[0].shape, (6704, 1))
        self.assertEqual(fnl[1].shape, (6704, 1))
        self.assertIsNone(fnl[2])
    def test_target_classification(self):
        """given"""
        df = pd.read_csv(TEST_FILE, index_col='Date')
        df["sma"] = SMA(df["spy_Close"])
        df["is_above_1.0"] = (df["spy_Close"] / df["sma"]) > 1
        df["is_above_1.2"] = (df["spy_Close"] / df["sma"]) > 1.2

        model = pdu.SkModel(
            MLPClassifier(activation='tanh', hidden_layer_sizes=(60, 50), random_state=42),
            pdu.FeaturesAndLabels(features=['vix_Close'],
                                  labels={"a": ["is_above_1.0"], "b": ["is_above_1.2"]}))

        """when"""
        fit = df.fit(model, test_size=0.4, test_validate_split_seed=42)
        fit_summary_df = fit.training_summary.df
        bt_summary_df = df.backtest(fit.model).df
        predict_df = df.predict(fit.model, tail=1)

        """then"""
        self.assertListEqual(fit_summary_df.columns.tolist(),
                             [('a', PREDICTION_COLUMN_NAME, 'is_above_1.0'), ('b', PREDICTION_COLUMN_NAME, 'is_above_1.2'),
                              ('a', LABEL_COLUMN_NAME, 'is_above_1.0'), ('b', LABEL_COLUMN_NAME, 'is_above_1.2')])

        self.assertListEqual(predict_df.columns.tolist(),
                             [('a', PREDICTION_COLUMN_NAME, 'is_above_1.0'), ('b', PREDICTION_COLUMN_NAME, 'is_above_1.2')])

        self.assertEqual(bt_summary_df.shape, (6706, 16))
Esempio n. 10
0
    def test_save_load_models(self):
        """given"""
        features_and_labels = pmu.FeaturesAndLabels(["a"], ["b"])

        def keras_model_provider(optimizer='adam'):
            model = Sequential()
            model.add(Dense(1, input_dim=1, activation='sigmoid'))
            model.compile(optimizer, loss='mse')
            return model

        providers = [
            pmu.SkModel(MLPClassifier(activation='tanh',
                                      hidden_layer_sizes=(1, 1),
                                      alpha=0.001,
                                      random_state=42),
                        features_and_labels,
                        foo='bar'),
            pmu.SkModel(LogisticRegression(), features_and_labels),
            pmu.SkModel(LinearSVC(), features_and_labels),
            pmu.SkModel(RandomForestClassifier(), features_and_labels),
            pmu.KerasModel(keras_model_provider, features_and_labels),
            pmu.MultiModel(
                pmu.SkModel(LogisticRegression(),
                            pmu.FeaturesAndLabels(["a"], {"b": ["b"]})))
        ]
        """when"""
        fits = [df.fit(mp) for mp in providers]
        models = []
        for i, f in enumerate(fits):
            f.save_model(f'/tmp/pandas-ml-utils-unittest-test_model_{i}')
            models.append(
                (f.model,
                 pmu.Model.load(
                     f'/tmp/pandas-ml-utils-unittest-test_model_{i}')))
        """then"""
        for i, (fitted_model, restored_model) in enumerate(models):
            print(f"test model ==> {i}")
            pd.testing.assert_frame_equal(df.predict(fitted_model),
                                          df.predict(restored_model))
            pd.testing.assert_frame_equal(
                df.backtest(fitted_model).df,
                df.backtest(restored_model).df)
Esempio n. 11
0
    def test_binary_classification_summary(self):
        """given"""
        df = pd.read_csv(TEST_FILE, index_col='Date')
        df['label'] = df["spy_Close"] > df["spy_Open"]
        """when"""
        fit = df.fit(pdu.SkModel(
            MLPClassifier(activation='tanh',
                          hidden_layer_sizes=(60, 50),
                          alpha=0.001,
                          random_state=42),
            pdu.FeaturesAndLabels(
                features=['vix_Close'],
                labels=['label'],
                gross_loss=lambda df: df["spy_Close"] - df["spy_Open"]),
            BinaryClassificationSummary),
                     test_size=0.4,
                     test_validate_split_seed=42)
        """then confusion matrix"""
        self.assertEqual(fit.model.features_and_labels.min_required_samples, 1)
        np.testing.assert_array_equal(
            fit.training_summary.get_confusion_matrix(),
            np.array([[1067, 872], [1002, 1082]]))
        np.testing.assert_array_equal(fit.test_summary.get_confusion_matrix(),
                                      np.array([[744, 586], [655, 698]]))
        """  and confusion loss"""
        np.testing.assert_array_almost_equal(
            fit.test_summary.get_confusion_loss(),
            np.array([[374.90, -234.83], [561.48, -650.63]]), 2)
        """  and ratios"""
        np.testing.assert_array_almost_equal(
            np.array(fit.test_summary.get_ratios()), np.array((0.78, 0.88)), 2)
        """  and metrics"""
        np.testing.assert_array_almost_equal(
            np.array(list(fit.test_summary.get_metrics().values())),
            np.array([0.78, 0.88, 0.54]), 2)
        """  and plot_classification"""
        self.assertDictEqual(
            {
                k: repr(v)
                for k, v in fit.test_summary.plot_classification().items()
            }, {None: '<Figure size 1600x900 with 2 Axes>'})

        if os.environ.get('USER') == 'kic':
            # FIXME nowadays this test fails on github while it still passes locally. we need a better assertion
            self.assertDictEqual(
                {
                    k: len(fig_to_png_base64(v))
                    for k, v in fit.test_summary.plot_classification().items()
                }, {None: 141863})
            """  and _repr_html_"""
            self.assertEqual(len(fit.test_summary._repr_html_()), 145167)
Esempio n. 12
0
    def test_multi_model_binary_classifications(self):
        """given"""
        df = pd.read_csv(TEST_FILE, index_col='Date')
        df["sma"] = SMA(df["spy_Close"])
        df["is_above_1.0"] = (df["spy_Close"] / df["sma"]) > 1
        df["is_above_1.2"] = (df["spy_Close"] / df["sma"]) > 1.2

        model = pdu.MultiModel(
            pdu.SkModel(
                MLPClassifier(activation='tanh',
                              hidden_layer_sizes=(60, 50),
                              random_state=42),
                pdu.FeaturesAndLabels(
                    features=['vix_Close'],
                    labels={
                        "a": ["is_above_1.0"],
                        "b": ["is_above_1.2"]
                    },
                    targets=lambda frame, t: frame["sma"].rename(f"sma {t}"),
                    gross_loss=lambda frame: frame["spy_Close"] - frame["sma"])
            ))
        """when"""
        fit = df.fit(
            model,
            test_size=0.4,
            test_validate_split_seed=42,
        )
        fit_summary_df = fit.training_summary.df
        bt_summary_df = df.backtest(fit.model).df
        predict_df = df.predict(fit.model, tail=1)
        """then"""
        self.assertListEqual(fit_summary_df.columns.tolist(),
                             [('a', PREDICTION_COLUMN_NAME, 'is_above_1.0'),
                              ('b', PREDICTION_COLUMN_NAME, 'is_above_1.2'),
                              ('a', LABEL_COLUMN_NAME, 'is_above_1.0'),
                              ('b', LABEL_COLUMN_NAME, 'is_above_1.2'),
                              ('a', GROSS_LOSS_COLUMN_NAME, 'a'),
                              ('b', GROSS_LOSS_COLUMN_NAME, 'b'),
                              ('a', TARGET_COLUMN_NAME, 'sma a'),
                              ('b', TARGET_COLUMN_NAME, 'sma b')])

        self.assertListEqual(predict_df.columns.tolist(),
                             [('a', PREDICTION_COLUMN_NAME, 'is_above_1.0'),
                              ('b', PREDICTION_COLUMN_NAME, 'is_above_1.2'),
                              ('a', TARGET_COLUMN_NAME, 'sma a'),
                              ('b', TARGET_COLUMN_NAME, 'sma b')])

        self.assertEqual(bt_summary_df.shape, (6706, 20))
Esempio n. 13
0
 def test_model_with_LazyDataFrame_copy(self):
     """given"""
     model = pmu.SkModel(MLPClassifier(activation='tanh',
                                       hidden_layer_sizes=(1, 1),
                                       alpha=0.001,
                                       random_state=42),
                         pmu.FeaturesAndLabels([], []),
                         foo='bar',
                         ldf=LazyDataFrame(None, foo=lambda _f: 'bar'))
     """when"""
     model.save(f'/tmp/pandas-ml-utils-unittest-test_model_LDF')
     model2 = pmu.Model.load(
         f'/tmp/pandas-ml-utils-unittest-test_model_LDF')
     """then"""
     self.assertEqual(model.kwargs["ldf"], model2.kwargs["ldf"])
     self.assertEqual(model.kwargs["ldf"].kwargs['foo'](None), 'bar')
     self.assertEqual(model2.kwargs["ldf"].kwargs['foo'](None), 'bar')
Esempio n. 14
0
 def test_fit_and_co(self):
     """given"""
     df = pd.read_csv(TEST_FILE, index_col='Date').tail(100)
     ldf = pdu.LazyDataFrame(df,
                             sma=lambda f: f["vix_Close"].rolling(2).mean(),
                             label=lambda f: f["spy_Close"] > f["spy_Open"])
     model = pdu.SkModel(
         MLPClassifier(activation='tanh',
                       hidden_layer_sizes=(60, 50),
                       random_state=42),
         pdu.FeaturesAndLabels(["sma"], ["label"]))
     """when"""
     fit = ldf.fit(model)
     bt = ldf.backtest(fit.model)
     p = ldf.predict(fit.model)
     """then"""
     self.assertEqual(len(fit.test_summary.df), 40)
     self.assertEqual(len(bt.df), 100 - 1)
     self.assertEqual(len(p), 100 - 1)
Esempio n. 15
0
    def test_discrete_encoded_classes(self):
        """given"""
        df = pd.read_csv(TEST_FILE, index_col='Date')
        df["sma"] = SMA(df["spy_Close"])
        df["label"] = (((df["spy_Close"] / df["sma"] -1) > 0.02).astype(int) - ((df["spy_Close"] / df["sma"] -1) < -0.02).astype(int)) + 1


        model = pdu.SkModel(
            MLPClassifier(activation='tanh', hidden_layer_sizes=(60, 50), random_state=42),
            pdu.FeaturesAndLabels(features=['vix_Close'],
                                  labels=OneHotEncodedDiscrete("label", 3)))

        """when"""
        fit = df.fit(model, test_size=0.4, test_validate_split_seed=42,)
        predict_df = df.predict(fit.model, tail=1)

        """then"""
        self.assertListEqual(predict_df.columns.tolist(),
                             [(PREDICTION_COLUMN_NAME, 'label_0'),
                              (PREDICTION_COLUMN_NAME, 'label_1'),
                              (PREDICTION_COLUMN_NAME, 'label_2')])
Esempio n. 16
0
    def test_fit_regressor_mutiple_target(self):
        """given"""
        df = pd.read_csv(TEST_FILE, index_col='Date') / 50.
        """when"""
        fit = df.fit(pdu.SkModel(
            MLPRegressor(activation='tanh',
                         hidden_layer_sizes=(4, 3, 2, 1, 2, 3, 4),
                         random_state=42),
            pdu.FeaturesAndLabels(
                features=['spy_Open', 'spy_High', 'spy_Low', 'spy_Close'],
                labels={
                    "a": ['vix_Open'],
                    "b": ['vix_High', 'vix_Low', 'vix_Close']
                },
                targets=lambda frame, t: frame[['vix_High', 'vix_Low']
                                               ].add_prefix(f"{t}_"))),
                     test_size=0.4,
                     test_validate_split_seed=42)

        fitted_model = fit.model

        # backtest
        backtest_regression = df.backtest(fitted_model)
        self.assertIsNotNone(backtest_regression)

        # regressed
        regressed = df.predict(fitted_model)
        """then"""
        self.assertListEqual(regressed.columns.tolist(),
                             [('a', 'prediction', 'vix_Open'),
                              ('b', 'prediction', 'vix_High'),
                              ('b', 'prediction', 'vix_Low'),
                              ('b', 'prediction', 'vix_Close'),
                              ('a', 'target', 'a_vix_High'),
                              ('a', 'target', 'a_vix_Low'),
                              ('b', 'target', 'b_vix_High'),
                              ('b', 'target', 'b_vix_Low')])

        self.assertEqual(len(regressed), 6706)
Esempio n. 17
0
    def test_multi_class_classification(self):
        """given"""
        df = pd.read_csv(TEST_FILE, index_col='Date')
        df["sma"] = SMA(df["spy_Close"])
        df["label"] = df["spy_Close"] / df["sma"] -1

        def make_targets(frame):
            space = np.array([-1, -0.05, 0.5, 1])
            res = frame.apply(lambda x: x["sma"] - space, axis=1,
                              result_type='expand')
            res.columns = ["close <0.1", "close <0.05", "close >0", "close >0.05"]
            return res

        model = pdu.SkModel(
            MLPClassifier(activation='tanh', hidden_layer_sizes=(60, 50), random_state=42),
            pdu.FeaturesAndLabels(features=['vix_Close'],
                                  labels=OneHotEncodedTargets("label", np.linspace(-0.1, 0.1, 5, endpoint=True)),
                                  targets=make_targets))

        """when"""
        fit = df.fit(model, test_size=0.4, test_validate_split_seed=42,)
        fit_summary_df = fit.training_summary.df
        bt_summary_df = df.backtest(fit.model).df
        predict_df = df.predict(fit.model, tail=1)

        """then"""
        self.assertEqual(len(fit_summary_df), 4023)
        self.assertListEqual(fit_summary_df.columns.tolist(),
                             [(PREDICTION_COLUMN_NAME, '(-inf, -0.05]'), (PREDICTION_COLUMN_NAME, '(-0.05, 0.0]'), (PREDICTION_COLUMN_NAME,  '(0.0, 0.05000000000000002]'), (PREDICTION_COLUMN_NAME,  '(0.05000000000000002, inf]'),
                              (LABEL_COLUMN_NAME, '(-inf, -0.05]'), (LABEL_COLUMN_NAME, '(-0.05, 0.0]'), (LABEL_COLUMN_NAME,  '(0.0, 0.05000000000000002]'), (LABEL_COLUMN_NAME,  '(0.05000000000000002, inf]'),
                              (TARGET_COLUMN_NAME, 'close <0.1'), (TARGET_COLUMN_NAME, 'close <0.05'), (TARGET_COLUMN_NAME, 'close >0'), (TARGET_COLUMN_NAME, 'close >0.05')])

        self.assertListEqual(predict_df.columns.tolist(),
                             [(PREDICTION_COLUMN_NAME, '(-inf, -0.05]'), (PREDICTION_COLUMN_NAME, '(-0.05, 0.0]'), (PREDICTION_COLUMN_NAME,  '(0.0, 0.05000000000000002]'), (PREDICTION_COLUMN_NAME,  '(0.05000000000000002, inf]'),
                              (TARGET_COLUMN_NAME, 'close <0.1'), (TARGET_COLUMN_NAME, 'close <0.05'), (TARGET_COLUMN_NAME, 'close >0'), (TARGET_COLUMN_NAME, 'close >0.05')])

        self.assertEqual(bt_summary_df.shape, (6706, 23))
Esempio n. 18
0
    def test_save_load_keras_custom_loss(self):
        """given"""
        features_and_labels = pmu.FeaturesAndLabels(["a"], ["b"])
        name = '/tmp/pandas-ml-utils-unittest-test_model_keras_custom_loss'

        def loss_provider(foo):
            def my_custom_loss(x, y):
                print(foo)
                import keras.backend as K
                return K.sum(x - y)

            return my_custom_loss

        def keras_model_provider():
            model = Sequential()
            model.add(Dense(1, input_dim=1, activation='sigmoid'))

            model.compile(optimizer='Adam', loss=loss_provider("bar"))
            return model, loss_provider("bar")

        """when"""
        fit = df.fit(
            pmu.KerasModel(keras_model_provider,
                           features_and_labels,
                           optimizer='adam',
                           verbose=0))
        fitted_model = fit.model

        fit.save_model(name)
        restored_model = pmu.Model.load(name)
        """then"""
        pd.testing.assert_frame_equal(df.predict(fitted_model),
                                      df.predict(restored_model))
        pd.testing.assert_frame_equal(
            df.backtest(fitted_model).df,
            df.backtest(restored_model).df)
Esempio n. 19
0
    def test_multi_model_multi_class_classifications(self):
        """given"""
        df = pd.read_csv(TEST_FILE, index_col='Date')
        df["sma"] = SMA(df["spy_Close"])
        df["is_above_1.0"] = (df["spy_Close"] / df["sma"]) + 1
        df["is_above_1.2"] = (df["spy_Close"] / df["sma"]) + 2

        model = pdu.MultiModel(
            pdu.SkModel(
                MLPClassifier(activation='tanh',
                              hidden_layer_sizes=(60, 50),
                              random_state=42),
                pdu.FeaturesAndLabels(
                    features=['vix_Close'],
                    labels={
                        "1":
                        OneHotEncodedTargets(
                            "is_above_1.0",
                            np.linspace(-0.1, 0.1, 5, endpoint=True) + 1),
                        "2":
                        OneHotEncodedTargets(
                            "is_above_1.2",
                            np.linspace(-0.1, 0.1, 5, endpoint=True) + 2)
                    },
                    targets=lambda frame, t:
                    (frame["sma"] + int(t)).rename(f"sma {t}"),
                    gross_loss=lambda frame: frame["spy_Close"] - frame["sma"])
            ))
        """when"""
        fit = df.fit(
            model,
            test_size=0.4,
            test_validate_split_seed=42,
        )
        fit_summary_df = fit.training_summary.df
        bt_summary_df = df.backtest(fit.model).df
        predict_df = df.predict(fit.model, tail=1)
        """then"""
        print(fit_summary_df.columns.tolist())
        self.assertListEqual(fit_summary_df.columns.tolist(),
                             [('1', 'prediction', '(-inf, 0.95]'),
                              ('1', 'prediction', '(0.95, 1.0]'),
                              ('1', 'prediction', '(1.0, 1.05]'),
                              ('1', 'prediction', '(1.05, inf]'),
                              ('2', 'prediction', '(-inf, 1.95]'),
                              ('2', 'prediction', '(1.95, 2.0]'),
                              ('2', 'prediction', '(2.0, 2.05]'),
                              ('2', 'prediction', '(2.05, inf]'),
                              ('1', 'label', '(-inf, 0.95]'),
                              ('1', 'label', '(0.95, 1.0]'),
                              ('1', 'label', '(1.0, 1.05]'),
                              ('1', 'label', '(1.05, inf]'),
                              ('2', 'label', '(-inf, 1.95]'),
                              ('2', 'label', '(1.95, 2.0]'),
                              ('2', 'label', '(2.0, 2.05]'),
                              ('2', 'label', '(2.05, inf]'),
                              ('1', GROSS_LOSS_COLUMN_NAME, '1'),
                              ('2', GROSS_LOSS_COLUMN_NAME, '2'),
                              ('1', TARGET_COLUMN_NAME, 'sma 1'),
                              ('2', TARGET_COLUMN_NAME, 'sma 2')])

        self.assertListEqual(predict_df.columns.tolist(),
                             [('1', 'prediction', '(-inf, 0.95]'),
                              ('1', 'prediction', '(0.95, 1.0]'),
                              ('1', 'prediction', '(1.0, 1.05]'),
                              ('1', 'prediction', '(1.05, inf]'),
                              ('2', 'prediction', '(-inf, 1.95]'),
                              ('2', 'prediction', '(1.95, 2.0]'),
                              ('2', 'prediction', '(2.0, 2.05]'),
                              ('2', 'prediction', '(2.05, inf]'),
                              ('1', TARGET_COLUMN_NAME, 'sma 1'),
                              ('2', TARGET_COLUMN_NAME, 'sma 2')])

        self.assertEqual(bt_summary_df.shape, (6706, 32))