def test_regressor(self): """given some toy regression data""" df = pd.DataFrame({ "a": [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0], "b": [-2.0, 1.0, 4.0, 7.0, 10.0, 13.0] }) """and a model""" model = self.provide_regression_model( FeaturesAndLabels(features=["a"], labels=["b"])) """when we fit the model""" fit = df.model.fit(model, RandomSplits(0.3), verbose=0, epochs=500) print(fit.training_summary.df) """then we can predict""" prediction = df.model.predict(fit.model) np.testing.assert_array_almost_equal(prediction.iloc[:, 0].values, df["b"].values, 1) """and save and load the model""" temp = os.path.join(tempfile.gettempdir(), str(uuid.uuid4())) try: fit.model.save(temp) copy = Model.load(temp) pd.testing.assert_frame_equal(df.model.predict(fit.model), df.model.predict(copy), check_less_precise=True) finally: os.remove(temp)
def test_save_and_load_model_from_context(self): """given a clean workspace""" notebook_file = os.path.join(PWD, f'{__file__}.ipynb') out_notebook_file = os.path.join(PWD, f'{__file__}.py_all_output.ipynb') if os.path.exists(MODEL_FILE): os.remove(MODEL_FILE) if os.path.exists(out_notebook_file): os.remove(out_notebook_file) """when executing the notebook with a model context""" nb, errors = run_notebook(notebook_file, PWD, kernel=os.getenv("TOX_KERNEL") or "python3") """then we have no errors and a saved model""" self.assertEqual(errors, []) self.assertTrue(os.path.exists(MODEL_FILE)) """and when we load the saved model""" model = Model.load(MODEL_FILE) """then we can execute it""" self.assertEqual(2, len(DF_NOTES.model.predict(model, tail=2))) """and we can load the odel in a clean name space""" out = test_model_import(MODEL_FILE) print(out) self.assertIn(MODEL_FILE, out) self.assertIn("authentic", out)
def test_custom_objects(self): df = pd.DataFrame({ "a": [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0], "b": [-2.0, 1.0, 4.0, 7.0, 10.0, 13.0] }) def model_provider(): model = Sequential([Dense(units=1, input_shape=(1, ))]) model.compile(optimizer='sgd', loss=custom_loss_function) return model, custom_loss_function model = KerasModel( model_provider, FeaturesAndLabels(["a"], ["b"]), ) fit = df.model.fit(model) temp = os.path.join(tempfile.gettempdir(), str(uuid.uuid4())) try: fit.model.save(temp) copy = Model.load(temp) pd.testing.assert_frame_equal(df.model.predict(fit.model), df.model.predict(copy), check_less_precise=True) finally: os.remove(temp)
def test_load_only(self): from pandas_ml_common.utils.column_lagging_utils import lag_columns notebooks_path = os.path.join(PWD, '..', 'examples') df = pd.read_csv(os.path.join(notebooks_path, 'SPY.csv')) #model = Model.load("/tmp/pijsfnwuacpa.model") model = Model.load('/tmp/regression_with_regularization.model') prediction = df.model.predict(model, tail=1) print(prediction) self.assertEqual(len(prediction), 1)
def test_auto_encoder(self): """given the implementation can handle auto encoders""" model = self.provide_auto_encoder_model( FeaturesAndLabels(["a", "b"], ["a", "b"])) if model is None: return """and some toy classification data""" df = pd.DataFrame({ "a": [ 1, 0, 1, 0, ], "b": [ 0, 1, 0, 1, ], }) """when we fit the model""" fit = df.model.fit(model, NaiveSplitter(0.49), verbose=0, epochs=500) print(fit.training_summary.df) """then we can encoder""" encoded_prediction = df.model.predict(fit.model.as_encoder()) print(encoded_prediction) """and we can decoder""" decoder_features = encoded_prediction.columns.to_list()[0:1] decoded_prediction = encoded_prediction.model.predict( fit.model.as_decoder(decoder_features)) print(decoded_prediction) np.testing.assert_array_almost_equal( decoded_prediction["prediction"].values > 0.5, df[["a", "b"]].values) """and we can encoder and decore after safe and load""" temp = os.path.join(tempfile.gettempdir(), str(uuid.uuid4())) try: fit.model.save(temp) copy = Model.load(temp) pd.testing.assert_frame_equal(df.model.predict( fit.model.as_encoder()), df.model.predict(copy.as_encoder()), check_less_precise=True) pd.testing.assert_frame_equal( encoded_prediction.model.predict( fit.model.as_decoder(decoder_features)), encoded_prediction.model.predict( copy.as_decoder(decoder_features)), check_less_precise=True) finally: os.remove(temp)
def ta_model(df: _t.PatchedDataFrame, model: _Union[Model, str], post_processors=[]) -> _t.PatchedDataFrame: if isinstance(model, str): model = Model.load(model) p = df.model.predict(model)[PREDICTION_COLUMN_NAME] # apply post processors for pc in post_processors: p = pc(p) return p
def test_save_load_nb_model(self): notebooks_path = os.path.join(PWD, '..', 'examples') notebook_file = os.path.join(notebooks_path, 'regression_with_regularization.ipynb') out, err = run_notebook(notebook_file, notebooks_path) self.assertEqual(err, []) df = pd.read_csv(os.path.join(notebooks_path, 'SPY.csv')) model = Model.load('/tmp/regression_with_regularization.model') prediction = df.model.predict(model, tail=1)["prediction", "Close"].item() print(prediction) self.assertEqual(float(out["cells"][-1]["outputs"][-1]["data"]["text/plain"]), prediction)
def test_classifier(self): """given some toy classification data""" df = pd.DataFrame({ "a": [ 1, 0, 1, 0, 1, 0, 1, 0, ], "b": [ 0, 0, 1, 1, 0, 0, 1, 1, ], "c": [ 1, 0, 0, 1, 1, 0, 0, 1, ] }) """and a model""" model = self.provide_classification_model( FeaturesAndLabels(features=["a", "b"], labels=["c"], label_type=int)) """when we fit the model""" fit = df.model.fit(model, NaiveSplitter(0.49), verbose=0, epochs=1500) print(fit.training_summary.df) prediction = df.model.predict(fit.model) binary_prediction = prediction.iloc[:, 0] >= 0.5 np.testing.assert_array_equal( binary_prediction, np.array([ True, False, False, True, True, False, False, True, ])) """and save and load the model""" temp = os.path.join(tempfile.gettempdir(), str(uuid.uuid4())) try: fit.model.save(temp) copy = Model.load(temp) pd.testing.assert_frame_equal(df.model.predict(fit.model), df.model.predict(copy), check_less_precise=True) finally: os.remove(temp)
def test_stacked_models(self): """given some toy classification data""" df = pd.DataFrame({ "a": [ 1, 0, 1, 0, 1, 0, 1, 0, ], "b": [ 0, 0, 1, 1, 0, 0, 1, 1, ], "c": [ 1, 0, 0, 1, 1, 0, 0, 1, ] }) """and a model""" model = self.provide_classification_model( FeaturesAndLabels(features=[ "a", SubModelFeature( "b", self.provide_classification_model( FeaturesAndLabels(features=["a", "b"], labels=["c"], label_type=int))) ], labels=["c"], label_type=int)) temp = os.path.join(tempfile.gettempdir(), str(uuid.uuid4())) with self.assertLogs(level='INFO') as cm: with df.model(temp) as m: fit = m.fit(model) self.assertIn( "INFO:pandas_ml_utils.ml.model.base_model:fitting submodel: b", cm.output[0]) self.assertIn( "INFO:pandas_ml_utils.ml.model.base_model:fitted submodel", [ s for s in cm.output if s.startswith( "INFO:pandas_ml_utils.ml.model.base_model:fitted") ][0]) prediction = df.model.predict(fit.model) prediction2 = df.model.predict(Model.load(temp)) pd.testing.assert_frame_equal(prediction, prediction2) os.remove(temp)
def test_auto_encoder(self): """given the implementation can handle auto encoders""" model = self.provide_auto_encoder_model( FeaturesAndLabels(features=["a", "b"], labels=["a", "b"], latent=["x"])) if model is None: return """and some toy classification data""" df = pd.DataFrame({ "a": [1, 0] * 10, "b": [0, 1] * 10, }) """when we fit the model""" batch_size, epochs = self.provide_batch_size_and_epoch() with df.model() as m: fit = m.fit(model, FittingParameter(splitter=naive_splitter(0.49), batch_size=batch_size, epochs=epochs), verbose=0) print(fit.training_summary.df) """then we can predict Autoencoded""" auto_encoded_prediction = df.model.predict(fit.model) self.assertEqual((20, 2), auto_encoded_prediction["prediction"].shape) """and we can encode""" encoded_prediction = df.model.predict(fit.model.as_encoder()) print(encoded_prediction) self.assertEqual((20, 1), encoded_prediction["prediction"].shape) """and we can decode""" decoded_prediction = encoded_prediction["prediction"].model.predict( fit.model.as_decoder()) print(decoded_prediction) np.testing.assert_array_almost_equal( decoded_prediction["prediction"].values > 0.5, df[["a", "b"]].values) """and we can encoder and decode after safe and load""" temp = os.path.join(tempfile.gettempdir(), str(uuid.uuid4())) try: fit.model.save(temp) copy = Model.load(temp) pd.testing.assert_frame_equal(df.model.predict( fit.model.as_encoder()), df.model.predict(copy.as_encoder()), check_less_precise=True) pd.testing.assert_frame_equal( encoded_prediction.model.predict(fit.model.as_decoder()), encoded_prediction.model.predict(copy.as_decoder()), check_less_precise=True) finally: os.remove(temp) # try to save only as encoder model try: fit.model.as_encoder().save(temp) copy = Model.load(temp) finally: os.remove(temp)
def test_classifier(self): """given some toy classification data""" df = pd.DataFrame({ "a": [ 1, 0, 1, 0, 1, 0, 1, 0, ], "b": [ 0, 0, 1, 1, 0, 0, 1, 1, ], "c": [ 1, 0, 0, 1, 1, 0, 0, 1, ] }) """and a model""" model = self.provide_classification_model( FeaturesAndLabels(features=["a", "b"], labels=["c"], label_type=int)) temp = os.path.join(tempfile.gettempdir(), str(uuid.uuid4())) """when we fit the model""" batch_size, epochs = self.provide_batch_size_and_epoch() with df.model(temp) as m: fit = m.fit(model, FittingParameter(splitter=naive_splitter(0.49), batch_size=batch_size, epochs=epochs), verbose=0) print(fit.training_summary.df) # fit.training_summary.df.to_pickle('/tmp/classifier.df') # print(fit._repr_html_()) """then we get a html summary and can predict""" self.assertIn('<style>', fit.training_summary._repr_html_()) prediction = df.model.predict(fit.model) binary_prediction = prediction.iloc[:, 0] >= 0.5 np.testing.assert_array_equal( binary_prediction, np.array([ True, False, False, True, True, False, False, True, ])) """and load the model""" try: copy = Model.load(temp) pd.testing.assert_frame_equal(df.model.predict(fit.model), df.model.predict(copy), check_less_precise=True) # test using context manager and ForecastProvider pd.testing.assert_frame_equal( df.model(temp).predict(forecast_provider=Forecast).df, df.model.predict(copy), check_less_precise=True) finally: os.remove(temp)
def test_reassembling(self): df = pd.read_pickle(os.path.join(TEST_DATA_PATH, "btc_hourly.df")) model = Model.load(os.path.join(TEST_DATA_PATH, "MultiModel-Predictive-VAE--BTC.model")) prediction = df.model.predict(model, tail=2) print(prediction)