def test_partial_fit_regression(self): data = make_regression(100, 2, 1) df = pd.DataFrame(data[0]) df["label"] = data[1] with df.model() as m: fit_partial = m.fit( SkModel( MLPRegressor(max_iter=1, random_state=42), FeaturesAndLabels(features=[0, 1], labels=['label']) ), FittingParameter( naive_splitter(0.3), batch_size=10, fold_epochs=10 ) ) with df.model() as m: fit = m.fit( SkModel( MLPRegressor(max_iter=10, random_state=42), FeaturesAndLabels(features=[0, 1], labels=['label']) ), FittingParameter(naive_splitter(0.3)) ) self.assertAlmostEqual(df.model.predict(fit.model).iloc[0,-1], df.model.predict(fit_partial.model).iloc[0,-1], 4)
def provide_linear_regression_model(self): from sklearn.linear_model import LinearRegression from sklearn.neural_network import MLPRegressor from pandas_ml_utils import FeaturesAndLabels, SkModel return [ ( SkModel(LinearRegression(), FeaturesAndLabels(["x"], ["y"])), FittingParameter(epochs=1, fold_epochs=1, context="LinearRegression") ), ( SkModel( MLPRegressor(10, learning_rate_init=0.01, max_iter=9000, validation_fraction=0), FeaturesAndLabels(["x"], ["y"]) ), FittingParameter(epochs=1, fold_epochs=1, context="MLPRegressor") ), ( SkModel( MLPRegressor(10, learning_rate_init=0.01, max_iter=1, validation_fraction=0, warm_start=True), FeaturesAndLabels(["x"], ["y"]) ), FittingParameter(epochs=9000, fold_epochs=1, context="MLPRegressor partial fit") ) ]
def test_partial_fit_classification(self): data = make_classification(100, 2, 1, 0, n_clusters_per_class=1) df = pd.DataFrame(data[0]) df["label"] = data[1] with df.model() as m: fit_partial = m.fit( SkModel( MLPClassifier(max_iter=1, random_state=42), FeaturesAndLabels(features=[0, 1], labels=['label']), classes=np.unique(data[1]) ), FittingParameter( stratified_random_splitter(0.3), batch_size=10, fold_epochs=10, ) ) with df.model() as m: fit = m.fit( SkModel( MLPClassifier(max_iter=10, random_state=42), FeaturesAndLabels(features=[0, 1], labels=['label']) ), FittingParameter(stratified_random_splitter(0.3)) ) self.assertAlmostEqual(df.model.predict(fit.model).iloc[0,-1], df.model.predict(fit_partial.model).iloc[0,-1], 4)
def test_simple_regression_model(self): df = DF_TEST.copy() fit = df.model.fit( SkModel( MLPRegressor(activation='tanh', hidden_layer_sizes=(60, 50), random_state=42, max_iter=2), FeaturesAndLabels(features=[ lambda df: df["Close"].ta.rsi().ta.rnn(28), lambda df: (df["Volume"] / df["Volume"].ta.ema(14) - 1).ta.rnn(28) ], labels=[ lambda df: (df["Close"] / df["Open"] - 1).shift(-1), ]), summary_provider=RegressionSummary), FittingParameter()) print(fit) html = fit._repr_html_() prediction = df.model.predict(fit.model) print(prediction) self.assertIsInstance(prediction[PREDICTION_COLUMN_NAME, 0].iloc[-1], (float, np.float, np.float32, np.float64)) backtest = df.model.backtest(fit.model)
def test_simple_classification_model(self): df = DF_NOTES.copy() with df.model() as m: fit = m.fit( SkModel( MLPClassifier(activation='tanh', hidden_layer_sizes=(20, 12), random_state=42, max_iter=2), FeaturesAndLabels( features=["variance", "skewness", "kurtosis", "entropy"], labels=["authentic"], label_type=bool ) ), FittingParameter(stratified_random_splitter()) ) print(fit) html = fit._repr_html_() prediction = df.model.predict(fit.model) print(prediction) self.assertGreaterEqual(prediction[PREDICTION_COLUMN_NAME].iloc[-1].values, 0.68) backtest = df.model.backtest(fit.model) self.assertIn(FEATURE_COLUMN_NAME, backtest.df) self.assertIn(LABEL_COLUMN_NAME, backtest.df) np.testing.assert_array_almost_equal(prediction[PREDICTION_COLUMN_NAME].iloc[-1].values, backtest.df[PREDICTION_COLUMN_NAME].iloc[-1].values) # test multiple samples samples = df.model.predict(fit.model, samples=2) self.assertIsInstance(samples[PREDICTION_COLUMN_NAME].iloc[-1, 0], list) self.assertEqual(2, len(samples[PREDICTION_COLUMN_NAME].iloc[-1, 0]))
def test_regressor(self): """given some toy regression data""" df = pd.DataFrame({ "a": [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0], "b": [-2.0, 1.0, 4.0, 7.0, 10.0, 13.0] }) """and a model""" model = self.provide_regression_model( FeaturesAndLabels(features=["a"], labels=["b"])) """when we fit the model""" batch_size, epochs = self.provide_batch_size_and_epoch() with df.model() as m: fit = m.fit(model, FittingParameter(splitter=naive_splitter(0.3), batch_size=batch_size, epochs=epochs), verbose=0) print(fit.training_summary.df) self.assertEqual(4, len(fit.training_summary.df)) self.assertEqual(2, len(fit.test_summary.df)) """then we can predict""" prediction = df.model.predict(fit.model) np.testing.assert_array_almost_equal(prediction.iloc[:, 0].values, df["b"].values, 1) """and save and load the model""" temp = os.path.join(tempfile.gettempdir(), str(uuid.uuid4())) try: fit.model.save(temp) copy = Model.load(temp) pd.testing.assert_frame_equal(df.model.predict(fit.model), df.model.predict(copy), check_less_precise=True) finally: os.remove(temp)
def test_multindex_row_multi_samples(self): """given some toy regression data while we provide a multiindex for the rows""" df = pd.DataFrame( { "a": [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0], "b": [ -2.0, 1.0, 4.0, 7.0, 10.0, 13.0, -2.0, 1.0, 4.0, 7.0, 10.0, 13.0 ] }, index=pd.MultiIndex.from_product([["A", "B"], range(6)])) """and a model""" model = self.provide_regression_model( FeaturesAndLabels(features=["a"], labels=["b"])) """when we fit the model""" batch_size, epochs = self.provide_batch_size_and_epoch() with df.model() as m: fit = m.fit(model, FittingParameter(splitter=random_splitter( 0.3, partition_row_multi_index=True), batch_size=batch_size, epochs=epochs), verbose=0) self.assertEqual(8, len(fit.training_summary.df)) self.assertEqual(4, len(fit.test_summary.df)) prediction = df.model.predict(fit.model, samples=2) self.assertEqual(2, len(prediction.iloc[:, 0]._.values)) self.assertEqual((6, 2), prediction.loc["A"].iloc[:, 0]._.values.shape) self.assertEqual((6, 2), prediction.loc["B"].iloc[:, 0]._.values.shape)
def test_simple_classification_cross_validation(self): df = DF_NOTES.copy() with df.model() as m: fit = m.fit( SkModel( MLPClassifier(activation='tanh', hidden_layer_sizes=(20, 12), random_state=42, max_iter=2), FeaturesAndLabels( features=["variance", "skewness", "kurtosis", "entropy"], labels=["authentic"], label_type=bool ) ), FittingParameter( splitter=random_splitter(), cross_validation=KFold(3, random_state=42, shuffle=True) ) ) print(fit) html = fit._repr_html_() prediction = df.model.predict(fit.model) print(prediction) self.assertGreaterEqual(prediction[PREDICTION_COLUMN_NAME].iloc[-1].values, 0.65)
def test_regularized_loss(self): df = pd.DataFrame({ "f": np.sin(np.linspace(0, 12, 40)), "l": np.sin(np.linspace(5, 17, 40)) }) class TestModel(PytorchNN): def __init__(self): super().__init__() self.net = nn.Sequential(nn.Linear(1, 3), nn.ReLU(), nn.Linear(3, 2), nn.ReLU(), nn.Linear(2, 1), nn.Sigmoid()) def forward_training(self, x): return self.net(x) def L2(self) -> Dict[str, float]: return {'**/2/**/weight': 99999999999.99} fit = df.model.fit( PytorchModel(TestModel, FeaturesAndLabels(["f"], ["l"]), nn.MSELoss, Adam), FittingParameter(epochs=1000, splitter=naive_splitter(0.5))) print(fit.model._current_model.net.net[2].weight.detach().numpy()) print( fit.model._current_model.net.net[2].weight.norm().detach().item()) self.assertLess( fit.model._current_model.net.net[2].weight.norm().detach().item(), 0.1)
def test_linear_model(self): df = DF_NOTES.copy() with df.model() as m: fit = m.fit( SkModel( Lasso(), FeaturesAndLabels( features=[ lambda df: df["variance"], lambda df: (df["skewness"] / df["kurtosis"]).rename("engineered") ], labels=[ 'authentic' ] ) ), FittingParameter(naive_splitter()) ) print(fit) prediction = df.model.predict(fit.model) print(prediction) backtest = df.model.backtest(fit.model) self.assertLess(backtest.model.sk_model.coef_[0], 1e-5)
def test_make_model(self): notebooks_path = os.path.join(PWD, '..', 'examples') df = pd.read_csv(os.path.join(notebooks_path, 'SPY.csv')) with df.model("/tmp/pijsfnwuacpa.model") as m: from torch import nn from torch.optim import SGD from pandas_ml_common.utils.column_lagging_utils import lag_columns from pandas_ml_utils import FeaturesAndLabels, RegressionSummary, FittingParameter from pandas_ml_utils_torch import PytorchModel from pandas_ml_utils_torch.merging_cross_folds import take_the_best def net_provider(): from pandas_ml_utils_torch import PytorchNN class Net(PytorchNN): def __init__(self): super().__init__() self.net = nn.Sequential( nn.Linear(10, 4), nn.Tanh(), nn.Linear(4, 4), nn.Tanh(), nn.Linear(4, 1), nn.Tanh(), ) def L1(self): # path to the parameters which should be regularized # the path is constructed from self.named_parameters() and allows the use of wildcards return {'net/0/**/weight': 0.02} def L2(self): return { 'net/0/**/weight': 0.02, 'net/2/**/weight': 0.05 } def forward_training(self, x): return self.net(x) return Net() fit = m.fit( PytorchModel( net_provider, FeaturesAndLabels( [lambda df: lag_columns(df["Close"].pct_change(), range(10))], [lambda df: df["Close"].pct_change().shift(-1)]), nn.MSELoss, lambda params: SGD(params, lr=0.01, momentum=0.0), merge_cross_folds=take_the_best, summary_provider=RegressionSummary ), FittingParameter(epochs=2), verbose=1 )
def provide_non_linear_regression_model(self): from sklearn.neural_network import MLPRegressor from pandas_ml_utils import FeaturesAndLabels, SkModel return [ ( SkModel( MLPRegressor(200, learning_rate_init=0.001, max_iter=5000, validation_fraction=0), FeaturesAndLabels(["x"], ["y"]) ), FittingParameter(epochs=1, context="epoch 1 fit"), ), ( SkModel( MLPRegressor(200, learning_rate_init=0.001, max_iter=1, validation_fraction=0, warm_start=True), FeaturesAndLabels(["x"], ["y"]) ), FittingParameter(epochs=5000, context="partial fit"), ) ]
def provide_non_linear_regression_model(self): from pandas_ml_utils_torch import PytorchModel, PytorchNN from pandas_ml_utils import FeaturesAndLabels from torch.optim import Adagrad from torch import nn import torch as t # t.manual_seed(0) class Net(PytorchNN): def __init__(self): super().__init__() self.net = nn.Sequential(nn.Linear(1, 200), nn.ReLU(), nn.Linear(200, 200), nn.ReLU(), nn.Linear(200, 200), nn.ReLU(), nn.Linear(200, 1), nn.ReLU()) def forward_training(self, *input) -> t.Tensor: return self.net(input[0]) t.manual_seed(0) model = PytorchModel(Net, FeaturesAndLabels(["x"], ["y"]), nn.MSELoss, Adagrad) return [( model, FittingParameter(epochs=600, batch_size=64, context="epoch fit batched"), ), ( model, FittingParameter(epochs=600, context="epoch fit"), ), ( model, FittingParameter(epochs=1, fold_epochs=600, context="fold epoch fit"), )]
def test_probabilistic(self): def create_sine_data(n=300): np.random.seed(32) n = 300 x = np.linspace(0, 1 * 2 * np.pi, n) y1 = 3 * np.sin(x) y1 = np.concatenate( (np.zeros(60), y1 + np.random.normal(0, 0.15 * np.abs(y1), n), np.zeros(60))) x = np.concatenate( (np.linspace(-3, 0, 60), np.linspace(0, 3 * 2 * np.pi, n), np.linspace(3 * 2 * np.pi, 3 * 2 * np.pi + 3, 60))) y2 = 0.1 * x + 1 y = y1 + y2 return x, y df = pd.DataFrame(np.array(create_sine_data(300)).T, columns=["x", "y"]) with df.model() as m: from pandas_ml_utils import FeaturesAndLabels from pandas_ml_utils_torch import PytorchNN, PytorchModel from pandas_ml_utils_torch.loss import HeteroscedasticityLoss from pandas_ml_common.sampling.splitter import duplicate_data from torch.optim import Adam from torch import nn class Net(PytorchNN): def __init__(self): super().__init__() self.l = nn.Sequential( nn.Linear(1, 20), nn.ReLU(), nn.Linear(20, 50), nn.ReLU(), nn.Linear(50, 20), nn.ReLU(), nn.Linear(20, 2), ) def forward_training(self, x): return self.l(x) fit = m.fit( PytorchModel(Net, FeaturesAndLabels(["x"], ["y"]), HeteroscedasticityLoss, Adam, restore_best_weights=True), FittingParameter(batch_size=128, epochs=10, splitter=duplicate_data()))
def provide_linear_regression_model(self): from pandas_ml_utils_torch import PytorchModel, PytorchNN from pandas_ml_utils import FeaturesAndLabels from torch.optim import Adam from torch import nn import torch as t class Net(PytorchNN): def __init__(self): super(Net, self).__init__() self.net = nn.Linear(1, 1) def forward_training(self, *input) -> t.Tensor: return self.net(input[0]) return [ ( PytorchModel(Net, FeaturesAndLabels(["x"], ["y"]), nn.MSELoss, Adam), FittingParameter(epochs=5000, context="epoch fit"), ), ( PytorchModel(Net, FeaturesAndLabels(["x"], ["y"]), nn.MSELoss, Adam), FittingParameter(epochs=5000, batch_size=64, context="epoch fit batched"), ), ( PytorchModel(Net, FeaturesAndLabels(["x"], ["y"]), nn.MSELoss, Adam), FittingParameter(epochs=1, fold_epochs=5000, context="fold epoch fit"), ), ]
def test_simple_classification_model_with_all_options(self): df = DF_NOTES.copy() with df.model() as m: fit = m.fit( SkModel( MLPClassifier(activation='tanh', hidden_layer_sizes=(20, 12), random_state=42, max_iter=2), FeaturesAndLabels( features=["variance", "skewness", "kurtosis", "entropy"], sample_weights=["variance"], gross_loss=["kurtosis"], targets=["entropy"], labels=["authentic"], label_type=bool ) ), FittingParameter(stratified_random_splitter()) ) # should not thro an error html = fit._repr_html_() # fit resulting columns print(fit.test_summary.df) self.assertIn(GROSS_LOSS_COLUMN_NAME, fit.training_summary.df) self.assertIn(FEATURE_COLUMN_NAME, fit.training_summary.df) self.assertIn(LABEL_COLUMN_NAME, fit.training_summary.df) self.assertIn(TARGET_COLUMN_NAME, fit.training_summary.df) self.assertIn(FEATURE_COLUMN_NAME, fit.test_summary.df) self.assertIn(LABEL_COLUMN_NAME, fit.test_summary.df) self.assertIn(TARGET_COLUMN_NAME, fit.test_summary.df) # prediction resulting columns prediction = df.model.predict(fit.model) print(prediction) self.assertIn(FEATURE_COLUMN_NAME, prediction) self.assertIn(TARGET_COLUMN_NAME, prediction) # backtest resulting columns backtest = df.model.backtest(fit.model) print(backtest.df) self.assertEqual(len(df), len(backtest.df)) self.assertIn(FEATURE_COLUMN_NAME, backtest.df) self.assertIn(LABEL_COLUMN_NAME, backtest.df) self.assertIn(TARGET_COLUMN_NAME, backtest.df) self.assertIn(GROSS_LOSS_COLUMN_NAME, backtest.df)
def test_mult_epoch_cross_validation(self): df = pd.DataFrame({ "a": [ 1, 0, 1, 0, 1, 0, 1, 0, ], "b": [ 0, 1, 0, 1, 1, 0, 1, 0, ], }) with df.model() as m: class NN(PytorchNN): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.nn = nn.Sequential( nn.Linear(1, 2), nn.ReLU(), nn.Linear(2, 1), ) def forward_training(self, x): return self.nn(x) fit = m.fit( PytorchModel(NN, FeaturesAndLabels(["a"], ["b"]), nn.MSELoss, Adam), FittingParameter(splitter=naive_splitter(0.5), epochs=2, fold_epochs=10, batch_size=2)) print(fit)
def test_multindex_row(self): """given some toy regression data while we provide a multiindex for the rows""" df = pd.DataFrame( { "a": [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0, -1.0, 0.0, 1.0, 2.0, 3.0, 4.0], "b": [ -2.0, 1.0, 4.0, 7.0, 10.0, 13.0, -2.0, 1.0, 4.0, 7.0, 10.0, 13.0 ] }, index=pd.MultiIndex.from_product([["A", "B"], range(6)])) """and a model""" model = self.provide_regression_model( FeaturesAndLabels(features=["a"], labels=["b"])) """when we fit the model""" batch_size, epochs = self.provide_batch_size_and_epoch() with df.model() as m: fit = m.fit(model, FittingParameter(splitter=random_splitter( 0.3, partition_row_multi_index=True), batch_size=batch_size, epochs=epochs), verbose=0) prediction = df.model.predict(fit.model) print(fit) # fit.training_summary.df.to_pickle('/tmp/multi_index_row_summary.df') # print(fit._repr_html_()) """then we get a prediction for A and B rows""" self.assertEqual(8, len(fit.training_summary.df)) self.assertEqual(4, len(fit.training_summary.df.loc["A"])) self.assertEqual(4, len(fit.training_summary.df.loc["B"])) self.assertEqual(4, len(fit.test_summary.df)) self.assertEqual(2, len(fit.test_summary.df.loc["A"])) self.assertEqual(2, len(fit.test_summary.df.loc["B"])) self.assertEqual(6, len(prediction.loc["A"])) self.assertEqual(6, len(prediction.loc["B"])) np.testing.assert_array_almost_equal(prediction.iloc[:, 0].values, df["b"].values, 1)
def test_multi_objective_loss(self): df = pd.DataFrame( np.array([ # train [0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1], # test [0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1], ]), columns=["f1", "f2", "l"]) class XorModule(PytorchNN): def __init__(self): super().__init__() self.x1 = nn.Linear(2, 1) self.s1 = nn.Sigmoid() self.x2 = nn.Linear(2, 1) self.s2 = nn.Sigmoid() self.s = nn.Softmax() def forward_training(self, x): return self.s1(self.x1(x)), self.s2(self.x2(x)) def forward_predict(self, x): return self.s1(self.x1(x)) fit = df.model.fit( PytorchModel( XorModule, FeaturesAndLabels(["f1", "f2"], ["l"]), lambda: MultiObjectiveLoss( (1, nn.MSELoss(reduction='none')), (1, nn.L1Loss(reduction='none')), on_epoch=lambda criterion, epoch: criterion.update_weights( (0, 1.1))), Adam), FittingParameter(splitter=naive_splitter(0.5))) print(fit.test_summary.df)
def test_no_test_data(self): """given some toy regression data""" df = pd.DataFrame({ "a": [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0], "b": [-2.0, 1.0, 4.0, 7.0, 10.0, 13.0] }) """and a model""" model = self.provide_regression_model( FeaturesAndLabels(features=["a"], labels=["b"])) """when we fit the model""" batch_size, epochs = self.provide_batch_size_and_epoch() with df.model() as m: fit = m.fit(model, FittingParameter(splitter=naive_splitter(0), batch_size=batch_size, epochs=epochs), verbose=0) # print(fit.training_summary.df) print(fit.test_summary.df) """then we have an empty test data frame""" self.assertEqual(len(fit.training_summary.df), len(df)) self.assertEqual(len(fit.test_summary.df), 0)
def test_multi_sample_regressor(self): """given some toy regression data""" df = pd.DataFrame({ "a": [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0], "b": [-2.0, 1.0, 4.0, 7.0, 10.0, 13.0] }) """and a model""" model = self.provide_regression_model( FeaturesAndLabels(features=["a"], labels=["b"])) """when we fit the model""" batch_size, epochs = self.provide_batch_size_and_epoch() with df.model() as m: fit = m.fit(model, FittingParameter(splitter=naive_splitter(0.3), batch_size=batch_size, epochs=epochs), verbose=0) print(fit.training_summary.df) """then we can predict""" prediction = df.model.predict(fit.model, samples=2) np.testing.assert_array_almost_equal( prediction.iloc[:, 0]._.values, np.concatenate([df[["b"]].values, df[["b"]].values], axis=1), 1)
def test_auto_encoder(self): """given the implementation can handle auto encoders""" model = self.provide_auto_encoder_model( FeaturesAndLabels(features=["a", "b"], labels=["a", "b"], latent=["x"])) if model is None: return """and some toy classification data""" df = pd.DataFrame({ "a": [1, 0] * 10, "b": [0, 1] * 10, }) """when we fit the model""" batch_size, epochs = self.provide_batch_size_and_epoch() with df.model() as m: fit = m.fit(model, FittingParameter(splitter=naive_splitter(0.49), batch_size=batch_size, epochs=epochs), verbose=0) print(fit.training_summary.df) """then we can predict Autoencoded""" auto_encoded_prediction = df.model.predict(fit.model) self.assertEqual((20, 2), auto_encoded_prediction["prediction"].shape) """and we can encode""" encoded_prediction = df.model.predict(fit.model.as_encoder()) print(encoded_prediction) self.assertEqual((20, 1), encoded_prediction["prediction"].shape) """and we can decode""" decoded_prediction = encoded_prediction["prediction"].model.predict( fit.model.as_decoder()) print(decoded_prediction) np.testing.assert_array_almost_equal( decoded_prediction["prediction"].values > 0.5, df[["a", "b"]].values) """and we can encoder and decode after safe and load""" temp = os.path.join(tempfile.gettempdir(), str(uuid.uuid4())) try: fit.model.save(temp) copy = Model.load(temp) pd.testing.assert_frame_equal(df.model.predict( fit.model.as_encoder()), df.model.predict(copy.as_encoder()), check_less_precise=True) pd.testing.assert_frame_equal( encoded_prediction.model.predict(fit.model.as_decoder()), encoded_prediction.model.predict(copy.as_decoder()), check_less_precise=True) finally: os.remove(temp) # try to save only as encoder model try: fit.model.as_encoder().save(temp) copy = Model.load(temp) finally: os.remove(temp)
def test_classifier(self): """given some toy classification data""" df = pd.DataFrame({ "a": [ 1, 0, 1, 0, 1, 0, 1, 0, ], "b": [ 0, 0, 1, 1, 0, 0, 1, 1, ], "c": [ 1, 0, 0, 1, 1, 0, 0, 1, ] }) """and a model""" model = self.provide_classification_model( FeaturesAndLabels(features=["a", "b"], labels=["c"], label_type=int)) temp = os.path.join(tempfile.gettempdir(), str(uuid.uuid4())) """when we fit the model""" batch_size, epochs = self.provide_batch_size_and_epoch() with df.model(temp) as m: fit = m.fit(model, FittingParameter(splitter=naive_splitter(0.49), batch_size=batch_size, epochs=epochs), verbose=0) print(fit.training_summary.df) # fit.training_summary.df.to_pickle('/tmp/classifier.df') # print(fit._repr_html_()) """then we get a html summary and can predict""" self.assertIn('<style>', fit.training_summary._repr_html_()) prediction = df.model.predict(fit.model) binary_prediction = prediction.iloc[:, 0] >= 0.5 np.testing.assert_array_equal( binary_prediction, np.array([ True, False, False, True, True, False, False, True, ])) """and load the model""" try: copy = Model.load(temp) pd.testing.assert_frame_equal(df.model.predict(fit.model), df.model.predict(copy), check_less_precise=True) # test using context manager and ForecastProvider pd.testing.assert_frame_equal( df.model(temp).predict(forecast_provider=Forecast).df, df.model.predict(copy), check_less_precise=True) finally: os.remove(temp)
def test_soft_dtw_loss(self): df = TEST_DF[["Close"]][-21:].copy() class LstmAutoEncoder(PytorchNN): def __init__(self): super().__init__() self.input_size = 1 self.seq_size = 10 self.hidden_size = 2 self.num_layers = 1 self.num_directions = 1 self._encoder =\ nn.RNN(input_size=self.input_size, hidden_size=self.hidden_size, num_layers=self.num_layers, batch_first=True) self._decoder =\ nn.RNN(input_size=self.hidden_size, hidden_size=self.input_size, num_layers=self.num_layers, batch_first=True) def forward_training(self, x): # make sure to treat single elements as batches x = x.view(-1, self.seq_size, self.input_size) batch_size = len(x) hidden_encoder = nn.Parameter( t.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size)) hidden_decoder = nn.Parameter( t.zeros(self.num_layers * self.num_directions, batch_size, self.input_size)) x, _ = self._encoder(x, hidden_encoder) x = t.repeat_interleave(x[:, -2:-1], x.shape[1], dim=1) x, hidden = self._decoder(x, hidden_decoder) return x.squeeze() def encode(self, x): x = x.reshape(-1, self.seq_size, self.input_size) batch_size = len(x) with t.no_grad(): hidden = nn.Parameter( t.zeros(self.num_layers * self.num_directions, batch_size, self.hidden_size)) # return last element of sequence return self._encoder(x, hidden)[0][:, -1] def decode(self, x): x = x.reshape(-1, self.seq_size, self.hidden_size) batch_size = len(x) with t.no_grad(): hidden = nn.Parameter( t.zeros(self.num_layers * self.num_directions, batch_size, self.input_size)) return self._decoder(x.float(), hidden)[0] model = PytorchAutoEncoderModel( LstmAutoEncoder, PostProcessedFeaturesAndLabels( df.columns.to_list(), [lambda df: lag_columns(df, 10).dropna()], df.columns.to_list(), [lambda df: lag_columns(df, 10).dropna()], ["condensed-a", "condensed-b"]), SoftDTW, Adam) with df.model() as m: fit = m.fit(model, FittingParameter(epochs=100)) print(fit.test_summary.df) encoded = df.model.predict(fit.model.as_encoder()) print(encoded)
def test_probabilistic_model_with_callback(self): try: pandas_ml_quant_data_provider = importlib.import_module( "pandas_ml_quant") from pandas_ml_quant import PricePredictionSummary from pandas_ml_quant.model.summary.price_prediction_summary import PriceSampledSummary except: print("pandas_ml_quant not found, skipping!") return df = pd.DataFrame({ "Returns": np.random.normal(-0.02, 0.03, 500) + np.random.normal(0.03, 0.02, 500) }) fl = PostProcessedFeaturesAndLabels( features=["Returns"], feature_post_processor=lambda df: df.ta.rnn(20), labels=[ lambda df: df["Returns"].shift(-1).rename("Future_Returns") ], targets=lambda df: (1 + df["Returns"]).cumprod().rename("Close")) model_factory = PytorchNNFactory.create( nn.Sequential( nn.Linear(20, 10), nn.Tanh(), nn.Linear(10, 6), LambdaSplitter( lambda x: T.softmax(x[..., :2], dim=1), lambda x: T.exp(x[..., 2:4]), # enforce one mean positive and the other negativ lambda x: T.cat([T.exp(x[..., 4:5]), -T.exp(x[..., 5:6])], dim=1), )), predictor=lambda n, i: T.cat(n(i), dim=1), trainer=lambda n, i: n(i)) def dist(probs, scales, locs): return MixtureSameFamily(Categorical(probs=probs), Normal(loc=locs, scale=scales)) def loss(y_pred): probs, scales, locs = y_pred return dist(probs, scales, locs) def cdf_cb(arg): probs, scales, locs = arg[..., :2], arg[..., 2:4], arg[..., 4:6] return dist(probs, scales, locs) summary_provider = PriceSampledSummary.with_reconstructor( sampler=wrap_applyable(lambda params, samples: cdf_cb(params). sample([int(samples.item())]), nr_args=2), samples=100, confidence=0.8) model = PytorchModel(module_provider=model_factory, features_and_labels=fl, criterion_provider=lambda: DistributionNLL( loss, penalize_toal_variance_lambda=1.1), optimizer_provider=Adam, summary_provider=summary_provider) fit = df.model.fit( model, FittingParameter(epochs=10, batch_size=6, splitter=naive_splitter(0.25)), #verbose=1, callbacks=[ TestConfidenceInterval( TestConfidenceInterval.CdfConfidenceInterval( wrap_applyable( lambda params, val: cdf_cb(params).cdf(val), nr_args=2), interval=0.8), wrap_applyable(lambda params: cdf_cb(params).variance), early_stopping=True) ]) print(fit.test_summary.calc_scores())