Exemple #1
0
    def test_partial_fit_regression(self):
        data = make_regression(100, 2, 1)
        df = pd.DataFrame(data[0])
        df["label"] = data[1]

        with df.model() as m:
            fit_partial = m.fit(
                SkModel(
                    MLPRegressor(max_iter=1, random_state=42),
                    FeaturesAndLabels(features=[0, 1], labels=['label'])
                ),
                FittingParameter(
                    naive_splitter(0.3),
                    batch_size=10,
                    fold_epochs=10
                )
            )

        with df.model() as m:
            fit = m.fit(
                SkModel(
                    MLPRegressor(max_iter=10, random_state=42),
                    FeaturesAndLabels(features=[0, 1], labels=['label'])
                ),
                FittingParameter(naive_splitter(0.3))
            )

        self.assertAlmostEqual(df.model.predict(fit.model).iloc[0,-1], df.model.predict(fit_partial.model).iloc[0,-1], 4)
Exemple #2
0
    def provide_linear_regression_model(self):
        from sklearn.linear_model import LinearRegression
        from sklearn.neural_network import MLPRegressor
        from pandas_ml_utils import FeaturesAndLabels, SkModel

        return [
            (
                SkModel(LinearRegression(), FeaturesAndLabels(["x"], ["y"])),
                FittingParameter(epochs=1, fold_epochs=1, context="LinearRegression")
            ),
            (
                SkModel(
                    MLPRegressor(10, learning_rate_init=0.01, max_iter=9000, validation_fraction=0),
                    FeaturesAndLabels(["x"], ["y"])
                 ),
                FittingParameter(epochs=1, fold_epochs=1, context="MLPRegressor")
            ),
            (
                SkModel(
                    MLPRegressor(10, learning_rate_init=0.01, max_iter=1, validation_fraction=0, warm_start=True),
                    FeaturesAndLabels(["x"], ["y"])
                 ),
                FittingParameter(epochs=9000, fold_epochs=1, context="MLPRegressor partial fit")
            )
        ]
Exemple #3
0
    def test_partial_fit_classification(self):
        data = make_classification(100, 2, 1, 0, n_clusters_per_class=1)
        df = pd.DataFrame(data[0])
        df["label"] = data[1]

        with df.model() as m:
            fit_partial = m.fit(
                SkModel(
                    MLPClassifier(max_iter=1, random_state=42),
                    FeaturesAndLabels(features=[0, 1], labels=['label']),
                    classes=np.unique(data[1])
                ),
                FittingParameter(
                    stratified_random_splitter(0.3),
                    batch_size=10,
                    fold_epochs=10,
                )
            )

        with df.model() as m:
            fit = m.fit(
                SkModel(
                    MLPClassifier(max_iter=10, random_state=42),
                    FeaturesAndLabels(features=[0, 1], labels=['label'])
                ),
                FittingParameter(stratified_random_splitter(0.3))
            )

        self.assertAlmostEqual(df.model.predict(fit.model).iloc[0,-1], df.model.predict(fit_partial.model).iloc[0,-1], 4)
    def test_simple_regression_model(self):
        df = DF_TEST.copy()

        fit = df.model.fit(
            SkModel(
                MLPRegressor(activation='tanh',
                             hidden_layer_sizes=(60, 50),
                             random_state=42,
                             max_iter=2),
                FeaturesAndLabels(features=[
                    lambda df: df["Close"].ta.rsi().ta.rnn(28), lambda df:
                    (df["Volume"] / df["Volume"].ta.ema(14) - 1).ta.rnn(28)
                ],
                                  labels=[
                                      lambda df:
                                      (df["Close"] / df["Open"] - 1).shift(-1),
                                  ]),
                summary_provider=RegressionSummary), FittingParameter())

        print(fit)
        html = fit._repr_html_()

        prediction = df.model.predict(fit.model)
        print(prediction)
        self.assertIsInstance(prediction[PREDICTION_COLUMN_NAME, 0].iloc[-1],
                              (float, np.float, np.float32, np.float64))

        backtest = df.model.backtest(fit.model)
    def test_simple_classification_model(self):
        df = DF_NOTES.copy()

        with df.model() as m:
            fit = m.fit(
                SkModel(
                    MLPClassifier(activation='tanh', hidden_layer_sizes=(20, 12), random_state=42, max_iter=2),
                    FeaturesAndLabels(
                        features=["variance", "skewness", "kurtosis", "entropy"],
                        labels=["authentic"],
                        label_type=bool
                    )
                ),
                FittingParameter(stratified_random_splitter())
            )

        print(fit)
        html = fit._repr_html_()

        prediction = df.model.predict(fit.model)
        print(prediction)
        self.assertGreaterEqual(prediction[PREDICTION_COLUMN_NAME].iloc[-1].values, 0.68)

        backtest = df.model.backtest(fit.model)
        self.assertIn(FEATURE_COLUMN_NAME, backtest.df)
        self.assertIn(LABEL_COLUMN_NAME, backtest.df)
        np.testing.assert_array_almost_equal(prediction[PREDICTION_COLUMN_NAME].iloc[-1].values,
                                             backtest.df[PREDICTION_COLUMN_NAME].iloc[-1].values)

        # test multiple samples
        samples = df.model.predict(fit.model, samples=2)
        self.assertIsInstance(samples[PREDICTION_COLUMN_NAME].iloc[-1, 0], list)
        self.assertEqual(2, len(samples[PREDICTION_COLUMN_NAME].iloc[-1, 0]))
Exemple #6
0
    def test_regressor(self):
        """given some toy regression data"""
        df = pd.DataFrame({
            "a": [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0],
            "b": [-2.0, 1.0, 4.0, 7.0, 10.0, 13.0]
        })
        """and a model"""
        model = self.provide_regression_model(
            FeaturesAndLabels(features=["a"], labels=["b"]))
        """when we fit the model"""
        batch_size, epochs = self.provide_batch_size_and_epoch()
        with df.model() as m:
            fit = m.fit(model,
                        FittingParameter(splitter=naive_splitter(0.3),
                                         batch_size=batch_size,
                                         epochs=epochs),
                        verbose=0)

        print(fit.training_summary.df)
        self.assertEqual(4, len(fit.training_summary.df))
        self.assertEqual(2, len(fit.test_summary.df))
        """then we can predict"""
        prediction = df.model.predict(fit.model)
        np.testing.assert_array_almost_equal(prediction.iloc[:, 0].values,
                                             df["b"].values, 1)
        """and save and load the model"""
        temp = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))
        try:
            fit.model.save(temp)
            copy = Model.load(temp)
            pd.testing.assert_frame_equal(df.model.predict(fit.model),
                                          df.model.predict(copy),
                                          check_less_precise=True)
        finally:
            os.remove(temp)
Exemple #7
0
    def test_multindex_row_multi_samples(self):
        """given some toy regression data while we provide a multiindex for the rows"""
        df = pd.DataFrame(
            {
                "a":
                [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0],
                "b": [
                    -2.0, 1.0, 4.0, 7.0, 10.0, 13.0, -2.0, 1.0, 4.0, 7.0, 10.0,
                    13.0
                ]
            },
            index=pd.MultiIndex.from_product([["A", "B"], range(6)]))
        """and a model"""
        model = self.provide_regression_model(
            FeaturesAndLabels(features=["a"], labels=["b"]))
        """when we fit the model"""
        batch_size, epochs = self.provide_batch_size_and_epoch()
        with df.model() as m:
            fit = m.fit(model,
                        FittingParameter(splitter=random_splitter(
                            0.3, partition_row_multi_index=True),
                                         batch_size=batch_size,
                                         epochs=epochs),
                        verbose=0)

        self.assertEqual(8, len(fit.training_summary.df))
        self.assertEqual(4, len(fit.test_summary.df))

        prediction = df.model.predict(fit.model, samples=2)
        self.assertEqual(2, len(prediction.iloc[:, 0]._.values))
        self.assertEqual((6, 2), prediction.loc["A"].iloc[:, 0]._.values.shape)
        self.assertEqual((6, 2), prediction.loc["B"].iloc[:, 0]._.values.shape)
    def test_simple_classification_cross_validation(self):
        df = DF_NOTES.copy()

        with df.model() as m:
            fit = m.fit(
                SkModel(
                    MLPClassifier(activation='tanh', hidden_layer_sizes=(20, 12), random_state=42, max_iter=2),
                    FeaturesAndLabels(
                        features=["variance", "skewness", "kurtosis", "entropy"],
                        labels=["authentic"],
                        label_type=bool
                    )
                ),
                FittingParameter(
                    splitter=random_splitter(),
                    cross_validation=KFold(3, random_state=42, shuffle=True)
                )
            )

        print(fit)
        html = fit._repr_html_()

        prediction = df.model.predict(fit.model)
        print(prediction)
        self.assertGreaterEqual(prediction[PREDICTION_COLUMN_NAME].iloc[-1].values, 0.65)
Exemple #9
0
    def test_regularized_loss(self):
        df = pd.DataFrame({
            "f": np.sin(np.linspace(0, 12, 40)),
            "l": np.sin(np.linspace(5, 17, 40))
        })

        class TestModel(PytorchNN):
            def __init__(self):
                super().__init__()
                self.net = nn.Sequential(nn.Linear(1, 3), nn.ReLU(),
                                         nn.Linear(3, 2), nn.ReLU(),
                                         nn.Linear(2, 1), nn.Sigmoid())

            def forward_training(self, x):
                return self.net(x)

            def L2(self) -> Dict[str, float]:
                return {'**/2/**/weight': 99999999999.99}

        fit = df.model.fit(
            PytorchModel(TestModel, FeaturesAndLabels(["f"], ["l"]),
                         nn.MSELoss, Adam),
            FittingParameter(epochs=1000, splitter=naive_splitter(0.5)))

        print(fit.model._current_model.net.net[2].weight.detach().numpy())
        print(
            fit.model._current_model.net.net[2].weight.norm().detach().item())
        self.assertLess(
            fit.model._current_model.net.net[2].weight.norm().detach().item(),
            0.1)
Exemple #10
0
    def test_linear_model(self):
        df = DF_NOTES.copy()

        with df.model() as m:
            fit = m.fit(
                SkModel(
                    Lasso(),
                    FeaturesAndLabels(
                        features=[
                            lambda df: df["variance"],
                            lambda df: (df["skewness"] / df["kurtosis"]).rename("engineered")
                        ],
                        labels=[
                            'authentic'
                        ]
                    )
                ),
                FittingParameter(naive_splitter())
            )

        print(fit)

        prediction = df.model.predict(fit.model)
        print(prediction)

        backtest = df.model.backtest(fit.model)
        self.assertLess(backtest.model.sk_model.coef_[0], 1e-5)
Exemple #11
0
    def test_make_model(self):
        notebooks_path = os.path.join(PWD, '..', 'examples')
        df = pd.read_csv(os.path.join(notebooks_path, 'SPY.csv'))

        with df.model("/tmp/pijsfnwuacpa.model") as m:
            from torch import nn
            from torch.optim import SGD
            from pandas_ml_common.utils.column_lagging_utils import lag_columns

            from pandas_ml_utils import FeaturesAndLabels, RegressionSummary, FittingParameter
            from pandas_ml_utils_torch import PytorchModel
            from pandas_ml_utils_torch.merging_cross_folds import take_the_best

            def net_provider():
                from pandas_ml_utils_torch import PytorchNN

                class Net(PytorchNN):

                    def __init__(self):
                        super().__init__()
                        self.net = nn.Sequential(
                            nn.Linear(10, 4),
                            nn.Tanh(),
                            nn.Linear(4, 4),
                            nn.Tanh(),
                            nn.Linear(4, 1),
                            nn.Tanh(),
                        )

                    def L1(self):
                        # path to the parameters which should be regularized
                        # the path is constructed from self.named_parameters() and allows the use of wildcards
                        return {'net/0/**/weight': 0.02}

                    def L2(self):
                        return {
                            'net/0/**/weight': 0.02,
                            'net/2/**/weight': 0.05
                        }

                    def forward_training(self, x):
                        return self.net(x)

                return Net()

            fit = m.fit(
                PytorchModel(
                    net_provider,
                    FeaturesAndLabels(
                        [lambda df: lag_columns(df["Close"].pct_change(), range(10))],
                        [lambda df: df["Close"].pct_change().shift(-1)]),
                    nn.MSELoss,
                    lambda params: SGD(params, lr=0.01, momentum=0.0),
                    merge_cross_folds=take_the_best,
                    summary_provider=RegressionSummary
                ),
                FittingParameter(epochs=2),
                verbose=1
            )
Exemple #12
0
    def provide_non_linear_regression_model(self):
        from sklearn.neural_network import MLPRegressor
        from pandas_ml_utils import FeaturesAndLabels, SkModel

        return [
            (
                SkModel(
                    MLPRegressor(200, learning_rate_init=0.001, max_iter=5000, validation_fraction=0),
                    FeaturesAndLabels(["x"], ["y"])
                ),
                FittingParameter(epochs=1, context="epoch 1 fit"),
            ),
            (
                SkModel(
                    MLPRegressor(200, learning_rate_init=0.001, max_iter=1, validation_fraction=0, warm_start=True),
                    FeaturesAndLabels(["x"], ["y"])
                ),
                FittingParameter(epochs=5000, context="partial fit"),
            )
        ]
Exemple #13
0
    def provide_non_linear_regression_model(self):
        from pandas_ml_utils_torch import PytorchModel, PytorchNN
        from pandas_ml_utils import FeaturesAndLabels
        from torch.optim import Adagrad
        from torch import nn
        import torch as t

        # t.manual_seed(0)

        class Net(PytorchNN):
            def __init__(self):
                super().__init__()
                self.net = nn.Sequential(nn.Linear(1, 200), nn.ReLU(),
                                         nn.Linear(200, 200), nn.ReLU(),
                                         nn.Linear(200, 200), nn.ReLU(),
                                         nn.Linear(200, 1), nn.ReLU())

            def forward_training(self, *input) -> t.Tensor:
                return self.net(input[0])

        t.manual_seed(0)
        model = PytorchModel(Net, FeaturesAndLabels(["x"], ["y"]), nn.MSELoss,
                             Adagrad)

        return [(
            model,
            FittingParameter(epochs=600,
                             batch_size=64,
                             context="epoch fit batched"),
        ), (
            model,
            FittingParameter(epochs=600, context="epoch fit"),
        ),
                (
                    model,
                    FittingParameter(epochs=1,
                                     fold_epochs=600,
                                     context="fold epoch fit"),
                )]
Exemple #14
0
    def test_probabilistic(self):
        def create_sine_data(n=300):
            np.random.seed(32)
            n = 300
            x = np.linspace(0, 1 * 2 * np.pi, n)
            y1 = 3 * np.sin(x)
            y1 = np.concatenate(
                (np.zeros(60), y1 + np.random.normal(0, 0.15 * np.abs(y1), n),
                 np.zeros(60)))
            x = np.concatenate(
                (np.linspace(-3, 0, 60), np.linspace(0, 3 * 2 * np.pi, n),
                 np.linspace(3 * 2 * np.pi, 3 * 2 * np.pi + 3, 60)))
            y2 = 0.1 * x + 1
            y = y1 + y2
            return x, y

        df = pd.DataFrame(np.array(create_sine_data(300)).T,
                          columns=["x", "y"])
        with df.model() as m:
            from pandas_ml_utils import FeaturesAndLabels
            from pandas_ml_utils_torch import PytorchNN, PytorchModel
            from pandas_ml_utils_torch.loss import HeteroscedasticityLoss
            from pandas_ml_common.sampling.splitter import duplicate_data
            from torch.optim import Adam
            from torch import nn

            class Net(PytorchNN):
                def __init__(self):
                    super().__init__()
                    self.l = nn.Sequential(
                        nn.Linear(1, 20),
                        nn.ReLU(),
                        nn.Linear(20, 50),
                        nn.ReLU(),
                        nn.Linear(50, 20),
                        nn.ReLU(),
                        nn.Linear(20, 2),
                    )

                def forward_training(self, x):
                    return self.l(x)

            fit = m.fit(
                PytorchModel(Net,
                             FeaturesAndLabels(["x"], ["y"]),
                             HeteroscedasticityLoss,
                             Adam,
                             restore_best_weights=True),
                FittingParameter(batch_size=128,
                                 epochs=10,
                                 splitter=duplicate_data()))
Exemple #15
0
    def provide_linear_regression_model(self):
        from pandas_ml_utils_torch import PytorchModel, PytorchNN
        from pandas_ml_utils import FeaturesAndLabels
        from torch.optim import Adam
        from torch import nn
        import torch as t

        class Net(PytorchNN):
            def __init__(self):
                super(Net, self).__init__()
                self.net = nn.Linear(1, 1)

            def forward_training(self, *input) -> t.Tensor:
                return self.net(input[0])

        return [
            (
                PytorchModel(Net, FeaturesAndLabels(["x"], ["y"]), nn.MSELoss,
                             Adam),
                FittingParameter(epochs=5000, context="epoch fit"),
            ),
            (
                PytorchModel(Net, FeaturesAndLabels(["x"], ["y"]), nn.MSELoss,
                             Adam),
                FittingParameter(epochs=5000,
                                 batch_size=64,
                                 context="epoch fit batched"),
            ),
            (
                PytorchModel(Net, FeaturesAndLabels(["x"], ["y"]), nn.MSELoss,
                             Adam),
                FittingParameter(epochs=1,
                                 fold_epochs=5000,
                                 context="fold epoch fit"),
            ),
        ]
Exemple #16
0
    def test_simple_classification_model_with_all_options(self):
        df = DF_NOTES.copy()

        with df.model() as m:
            fit = m.fit(
                SkModel(
                    MLPClassifier(activation='tanh', hidden_layer_sizes=(20, 12), random_state=42, max_iter=2),
                    FeaturesAndLabels(
                        features=["variance", "skewness", "kurtosis", "entropy"],
                        sample_weights=["variance"],
                        gross_loss=["kurtosis"],
                        targets=["entropy"],
                        labels=["authentic"],
                        label_type=bool
                    )
                ),
                FittingParameter(stratified_random_splitter())
            )

        # should not thro an error
        html = fit._repr_html_()

        # fit resulting columns
        print(fit.test_summary.df)
        self.assertIn(GROSS_LOSS_COLUMN_NAME, fit.training_summary.df)
        self.assertIn(FEATURE_COLUMN_NAME, fit.training_summary.df)
        self.assertIn(LABEL_COLUMN_NAME, fit.training_summary.df)
        self.assertIn(TARGET_COLUMN_NAME, fit.training_summary.df)

        self.assertIn(FEATURE_COLUMN_NAME, fit.test_summary.df)
        self.assertIn(LABEL_COLUMN_NAME, fit.test_summary.df)
        self.assertIn(TARGET_COLUMN_NAME, fit.test_summary.df)

        # prediction resulting columns
        prediction = df.model.predict(fit.model)
        print(prediction)
        self.assertIn(FEATURE_COLUMN_NAME, prediction)
        self.assertIn(TARGET_COLUMN_NAME, prediction)

        # backtest resulting columns
        backtest = df.model.backtest(fit.model)
        print(backtest.df)

        self.assertEqual(len(df), len(backtest.df))
        self.assertIn(FEATURE_COLUMN_NAME, backtest.df)
        self.assertIn(LABEL_COLUMN_NAME, backtest.df)
        self.assertIn(TARGET_COLUMN_NAME, backtest.df)
        self.assertIn(GROSS_LOSS_COLUMN_NAME, backtest.df)
    def test_mult_epoch_cross_validation(self):
        df = pd.DataFrame({
            "a": [
                1,
                0,
                1,
                0,
                1,
                0,
                1,
                0,
            ],
            "b": [
                0,
                1,
                0,
                1,
                1,
                0,
                1,
                0,
            ],
        })

        with df.model() as m:

            class NN(PytorchNN):
                def __init__(self, *args, **kwargs):
                    super().__init__(*args, **kwargs)
                    self.nn = nn.Sequential(
                        nn.Linear(1, 2),
                        nn.ReLU(),
                        nn.Linear(2, 1),
                    )

                def forward_training(self, x):
                    return self.nn(x)

            fit = m.fit(
                PytorchModel(NN, FeaturesAndLabels(["a"], ["b"]), nn.MSELoss,
                             Adam),
                FittingParameter(splitter=naive_splitter(0.5),
                                 epochs=2,
                                 fold_epochs=10,
                                 batch_size=2))

        print(fit)
Exemple #18
0
    def test_multindex_row(self):
        """given some toy regression data while we provide a multiindex for the rows"""
        df = pd.DataFrame(
            {
                "a":
                [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0],
                "b": [
                    -2.0, 1.0, 4.0, 7.0, 10.0, 13.0, -2.0, 1.0, 4.0, 7.0, 10.0,
                    13.0
                ]
            },
            index=pd.MultiIndex.from_product([["A", "B"], range(6)]))
        """and a model"""
        model = self.provide_regression_model(
            FeaturesAndLabels(features=["a"], labels=["b"]))
        """when we fit the model"""
        batch_size, epochs = self.provide_batch_size_and_epoch()
        with df.model() as m:
            fit = m.fit(model,
                        FittingParameter(splitter=random_splitter(
                            0.3, partition_row_multi_index=True),
                                         batch_size=batch_size,
                                         epochs=epochs),
                        verbose=0)

        prediction = df.model.predict(fit.model)
        print(fit)
        # fit.training_summary.df.to_pickle('/tmp/multi_index_row_summary.df')
        # print(fit._repr_html_())
        """then we get a prediction for A and B rows"""
        self.assertEqual(8, len(fit.training_summary.df))
        self.assertEqual(4, len(fit.training_summary.df.loc["A"]))
        self.assertEqual(4, len(fit.training_summary.df.loc["B"]))

        self.assertEqual(4, len(fit.test_summary.df))
        self.assertEqual(2, len(fit.test_summary.df.loc["A"]))
        self.assertEqual(2, len(fit.test_summary.df.loc["B"]))

        self.assertEqual(6, len(prediction.loc["A"]))
        self.assertEqual(6, len(prediction.loc["B"]))
        np.testing.assert_array_almost_equal(prediction.iloc[:, 0].values,
                                             df["b"].values, 1)
Exemple #19
0
    def test_multi_objective_loss(self):
        df = pd.DataFrame(
            np.array([
                # train
                [0, 0, 0],
                [0, 1, 1],
                [1, 0, 1],
                [1, 1, 1],
                # test
                [0, 0, 0],
                [0, 1, 1],
                [1, 0, 1],
                [1, 1, 1],
            ]),
            columns=["f1", "f2", "l"])

        class XorModule(PytorchNN):
            def __init__(self):
                super().__init__()
                self.x1 = nn.Linear(2, 1)
                self.s1 = nn.Sigmoid()
                self.x2 = nn.Linear(2, 1)
                self.s2 = nn.Sigmoid()
                self.s = nn.Softmax()

            def forward_training(self, x):
                return self.s1(self.x1(x)), self.s2(self.x2(x))

            def forward_predict(self, x):
                return self.s1(self.x1(x))

        fit = df.model.fit(
            PytorchModel(
                XorModule, FeaturesAndLabels(["f1", "f2"], ["l"]),
                lambda: MultiObjectiveLoss(
                    (1, nn.MSELoss(reduction='none')),
                    (1, nn.L1Loss(reduction='none')),
                    on_epoch=lambda criterion, epoch: criterion.update_weights(
                        (0, 1.1))), Adam),
            FittingParameter(splitter=naive_splitter(0.5)))

        print(fit.test_summary.df)
Exemple #20
0
    def test_no_test_data(self):
        """given some toy regression data"""
        df = pd.DataFrame({
            "a": [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0],
            "b": [-2.0, 1.0, 4.0, 7.0, 10.0, 13.0]
        })
        """and a model"""
        model = self.provide_regression_model(
            FeaturesAndLabels(features=["a"], labels=["b"]))
        """when we fit the model"""
        batch_size, epochs = self.provide_batch_size_and_epoch()
        with df.model() as m:
            fit = m.fit(model,
                        FittingParameter(splitter=naive_splitter(0),
                                         batch_size=batch_size,
                                         epochs=epochs),
                        verbose=0)

        # print(fit.training_summary.df)
        print(fit.test_summary.df)
        """then we have an empty test data frame"""
        self.assertEqual(len(fit.training_summary.df), len(df))
        self.assertEqual(len(fit.test_summary.df), 0)
Exemple #21
0
    def test_multi_sample_regressor(self):
        """given some toy regression data"""
        df = pd.DataFrame({
            "a": [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0],
            "b": [-2.0, 1.0, 4.0, 7.0, 10.0, 13.0]
        })
        """and a model"""
        model = self.provide_regression_model(
            FeaturesAndLabels(features=["a"], labels=["b"]))
        """when we fit the model"""
        batch_size, epochs = self.provide_batch_size_and_epoch()
        with df.model() as m:
            fit = m.fit(model,
                        FittingParameter(splitter=naive_splitter(0.3),
                                         batch_size=batch_size,
                                         epochs=epochs),
                        verbose=0)

        print(fit.training_summary.df)
        """then we can predict"""
        prediction = df.model.predict(fit.model, samples=2)
        np.testing.assert_array_almost_equal(
            prediction.iloc[:, 0]._.values,
            np.concatenate([df[["b"]].values, df[["b"]].values], axis=1), 1)
Exemple #22
0
    def test_auto_encoder(self):
        """given the implementation can handle auto encoders"""
        model = self.provide_auto_encoder_model(
            FeaturesAndLabels(features=["a", "b"],
                              labels=["a", "b"],
                              latent=["x"]))

        if model is None:
            return
        """and some toy classification data"""
        df = pd.DataFrame({
            "a": [1, 0] * 10,
            "b": [0, 1] * 10,
        })
        """when we fit the model"""
        batch_size, epochs = self.provide_batch_size_and_epoch()
        with df.model() as m:
            fit = m.fit(model,
                        FittingParameter(splitter=naive_splitter(0.49),
                                         batch_size=batch_size,
                                         epochs=epochs),
                        verbose=0)

        print(fit.training_summary.df)
        """then we can predict Autoencoded"""
        auto_encoded_prediction = df.model.predict(fit.model)
        self.assertEqual((20, 2), auto_encoded_prediction["prediction"].shape)
        """and we can encode"""
        encoded_prediction = df.model.predict(fit.model.as_encoder())
        print(encoded_prediction)
        self.assertEqual((20, 1), encoded_prediction["prediction"].shape)
        """and we can decode"""
        decoded_prediction = encoded_prediction["prediction"].model.predict(
            fit.model.as_decoder())
        print(decoded_prediction)
        np.testing.assert_array_almost_equal(
            decoded_prediction["prediction"].values > 0.5, df[["a",
                                                               "b"]].values)
        """and we can encoder and decode after safe and load"""
        temp = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))
        try:
            fit.model.save(temp)
            copy = Model.load(temp)

            pd.testing.assert_frame_equal(df.model.predict(
                fit.model.as_encoder()),
                                          df.model.predict(copy.as_encoder()),
                                          check_less_precise=True)

            pd.testing.assert_frame_equal(
                encoded_prediction.model.predict(fit.model.as_decoder()),
                encoded_prediction.model.predict(copy.as_decoder()),
                check_less_precise=True)
        finally:
            os.remove(temp)

        # try to save only as encoder model
        try:
            fit.model.as_encoder().save(temp)
            copy = Model.load(temp)
        finally:
            os.remove(temp)
Exemple #23
0
    def test_classifier(self):
        """given some toy classification data"""
        df = pd.DataFrame({
            "a": [
                1,
                0,
                1,
                0,
                1,
                0,
                1,
                0,
            ],
            "b": [
                0,
                0,
                1,
                1,
                0,
                0,
                1,
                1,
            ],
            "c": [
                1,
                0,
                0,
                1,
                1,
                0,
                0,
                1,
            ]
        })
        """and a model"""
        model = self.provide_classification_model(
            FeaturesAndLabels(features=["a", "b"],
                              labels=["c"],
                              label_type=int))
        temp = os.path.join(tempfile.gettempdir(), str(uuid.uuid4()))
        """when we fit the model"""
        batch_size, epochs = self.provide_batch_size_and_epoch()
        with df.model(temp) as m:
            fit = m.fit(model,
                        FittingParameter(splitter=naive_splitter(0.49),
                                         batch_size=batch_size,
                                         epochs=epochs),
                        verbose=0)

        print(fit.training_summary.df)
        # fit.training_summary.df.to_pickle('/tmp/classifier.df')
        # print(fit._repr_html_())
        """then we get a html summary and can predict"""
        self.assertIn('<style>', fit.training_summary._repr_html_())

        prediction = df.model.predict(fit.model)
        binary_prediction = prediction.iloc[:, 0] >= 0.5
        np.testing.assert_array_equal(
            binary_prediction,
            np.array([
                True,
                False,
                False,
                True,
                True,
                False,
                False,
                True,
            ]))
        """and load the model"""
        try:
            copy = Model.load(temp)
            pd.testing.assert_frame_equal(df.model.predict(fit.model),
                                          df.model.predict(copy),
                                          check_less_precise=True)

            # test using context manager and ForecastProvider
            pd.testing.assert_frame_equal(
                df.model(temp).predict(forecast_provider=Forecast).df,
                df.model.predict(copy),
                check_less_precise=True)
        finally:
            os.remove(temp)
Exemple #24
0
    def test_soft_dtw_loss(self):
        df = TEST_DF[["Close"]][-21:].copy()

        class LstmAutoEncoder(PytorchNN):
            def __init__(self):
                super().__init__()
                self.input_size = 1
                self.seq_size = 10
                self.hidden_size = 2
                self.num_layers = 1
                self.num_directions = 1

                self._encoder =\
                    nn.RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers,
                           batch_first=True)

                self._decoder =\
                    nn.RNN(input_size=self.hidden_size, hidden_size=self.input_size, num_layers=self.num_layers,
                           batch_first=True)

            def forward_training(self, x):
                # make sure to treat single elements as batches
                x = x.view(-1, self.seq_size, self.input_size)
                batch_size = len(x)

                hidden_encoder = nn.Parameter(
                    t.zeros(self.num_layers * self.num_directions, batch_size,
                            self.hidden_size))
                hidden_decoder = nn.Parameter(
                    t.zeros(self.num_layers * self.num_directions, batch_size,
                            self.input_size))

                x, _ = self._encoder(x, hidden_encoder)
                x = t.repeat_interleave(x[:, -2:-1], x.shape[1], dim=1)
                x, hidden = self._decoder(x, hidden_decoder)
                return x.squeeze()

            def encode(self, x):
                x = x.reshape(-1, self.seq_size, self.input_size)
                batch_size = len(x)

                with t.no_grad():
                    hidden = nn.Parameter(
                        t.zeros(self.num_layers * self.num_directions,
                                batch_size, self.hidden_size))

                    # return last element of sequence
                    return self._encoder(x, hidden)[0][:, -1]

            def decode(self, x):
                x = x.reshape(-1, self.seq_size, self.hidden_size)
                batch_size = len(x)

                with t.no_grad():
                    hidden = nn.Parameter(
                        t.zeros(self.num_layers * self.num_directions,
                                batch_size, self.input_size))
                    return self._decoder(x.float(), hidden)[0]

        model = PytorchAutoEncoderModel(
            LstmAutoEncoder,
            PostProcessedFeaturesAndLabels(
                df.columns.to_list(),
                [lambda df: lag_columns(df, 10).dropna()],
                df.columns.to_list(),
                [lambda df: lag_columns(df, 10).dropna()],
                ["condensed-a", "condensed-b"]), SoftDTW, Adam)

        with df.model() as m:
            fit = m.fit(model, FittingParameter(epochs=100))
            print(fit.test_summary.df)

            encoded = df.model.predict(fit.model.as_encoder())
            print(encoded)
    def test_probabilistic_model_with_callback(self):
        try:
            pandas_ml_quant_data_provider = importlib.import_module(
                "pandas_ml_quant")
            from pandas_ml_quant import PricePredictionSummary
            from pandas_ml_quant.model.summary.price_prediction_summary import PriceSampledSummary
        except:
            print("pandas_ml_quant not found, skipping!")
            return

        df = pd.DataFrame({
            "Returns":
            np.random.normal(-0.02, 0.03, 500) +
            np.random.normal(0.03, 0.02, 500)
        })

        fl = PostProcessedFeaturesAndLabels(
            features=["Returns"],
            feature_post_processor=lambda df: df.ta.rnn(20),
            labels=[
                lambda df: df["Returns"].shift(-1).rename("Future_Returns")
            ],
            targets=lambda df: (1 + df["Returns"]).cumprod().rename("Close"))

        model_factory = PytorchNNFactory.create(
            nn.Sequential(
                nn.Linear(20, 10),
                nn.Tanh(),
                nn.Linear(10, 6),
                LambdaSplitter(
                    lambda x: T.softmax(x[..., :2], dim=1),
                    lambda x: T.exp(x[..., 2:4]),
                    # enforce one mean positive and the other negativ
                    lambda x: T.cat([T.exp(x[..., 4:5]), -T.exp(x[..., 5:6])],
                                    dim=1),
                )),
            predictor=lambda n, i: T.cat(n(i), dim=1),
            trainer=lambda n, i: n(i))

        def dist(probs, scales, locs):
            return MixtureSameFamily(Categorical(probs=probs),
                                     Normal(loc=locs, scale=scales))

        def loss(y_pred):
            probs, scales, locs = y_pred
            return dist(probs, scales, locs)

        def cdf_cb(arg):
            probs, scales, locs = arg[..., :2], arg[..., 2:4], arg[..., 4:6]
            return dist(probs, scales, locs)

        summary_provider = PriceSampledSummary.with_reconstructor(
            sampler=wrap_applyable(lambda params, samples: cdf_cb(params).
                                   sample([int(samples.item())]),
                                   nr_args=2),
            samples=100,
            confidence=0.8)

        model = PytorchModel(module_provider=model_factory,
                             features_and_labels=fl,
                             criterion_provider=lambda: DistributionNLL(
                                 loss, penalize_toal_variance_lambda=1.1),
                             optimizer_provider=Adam,
                             summary_provider=summary_provider)

        fit = df.model.fit(
            model,
            FittingParameter(epochs=10,
                             batch_size=6,
                             splitter=naive_splitter(0.25)),
            #verbose=1,
            callbacks=[
                TestConfidenceInterval(
                    TestConfidenceInterval.CdfConfidenceInterval(
                        wrap_applyable(
                            lambda params, val: cdf_cb(params).cdf(val),
                            nr_args=2),
                        interval=0.8),
                    wrap_applyable(lambda params: cdf_cb(params).variance),
                    early_stopping=True)
            ])

        print(fit.test_summary.calc_scores())