def test_partial_fit_regression(self): data = make_regression(100, 2, 1) df = pd.DataFrame(data[0]) df["label"] = data[1] with df.model() as m: fit_partial = m.fit( SkModel( MLPRegressor(max_iter=1, random_state=42), FeaturesAndLabels(features=[0, 1], labels=['label']) ), FittingParameter( naive_splitter(0.3), batch_size=10, fold_epochs=10 ) ) with df.model() as m: fit = m.fit( SkModel( MLPRegressor(max_iter=10, random_state=42), FeaturesAndLabels(features=[0, 1], labels=['label']) ), FittingParameter(naive_splitter(0.3)) ) self.assertAlmostEqual(df.model.predict(fit.model).iloc[0,-1], df.model.predict(fit_partial.model).iloc[0,-1], 4)
def test_partial_fit_classification(self): data = make_classification(100, 2, 1, 0, n_clusters_per_class=1) df = pd.DataFrame(data[0]) df["label"] = data[1] with df.model() as m: fit_partial = m.fit( SkModel( MLPClassifier(max_iter=1, random_state=42), FeaturesAndLabels(features=[0, 1], labels=['label']), classes=np.unique(data[1]) ), FittingParameter( stratified_random_splitter(0.3), batch_size=10, fold_epochs=10, ) ) with df.model() as m: fit = m.fit( SkModel( MLPClassifier(max_iter=10, random_state=42), FeaturesAndLabels(features=[0, 1], labels=['label']) ), FittingParameter(stratified_random_splitter(0.3)) ) self.assertAlmostEqual(df.model.predict(fit.model).iloc[0,-1], df.model.predict(fit_partial.model).iloc[0,-1], 4)
def provide_linear_regression_model(self): from sklearn.linear_model import LinearRegression from sklearn.neural_network import MLPRegressor from pandas_ml_utils import FeaturesAndLabels, SkModel return [ ( SkModel(LinearRegression(), FeaturesAndLabels(["x"], ["y"])), FittingParameter(epochs=1, fold_epochs=1, context="LinearRegression") ), ( SkModel( MLPRegressor(10, learning_rate_init=0.01, max_iter=9000, validation_fraction=0), FeaturesAndLabels(["x"], ["y"]) ), FittingParameter(epochs=1, fold_epochs=1, context="MLPRegressor") ), ( SkModel( MLPRegressor(10, learning_rate_init=0.01, max_iter=1, validation_fraction=0, warm_start=True), FeaturesAndLabels(["x"], ["y"]) ), FittingParameter(epochs=9000, fold_epochs=1, context="MLPRegressor partial fit") ) ]
def _test_hyper_parameter_for_simple_model(self): from hyperopt import hp """given""" df = DF_TEST.copy() df['label'] = df["spy_Close"] > df["spy_Open"] """when fit with find hyper parameter""" fit = df.fit(SkModel( MLPClassifier(activation='tanh', hidden_layer_sizes=(60, 50), random_state=42), FeaturesAndLabels(features=['vix_Close'], labels=['label'], target_columns=["vix_Open"], loss_column="spy_Volume")), test_size=0.4, test_validate_split_seed=42, hyper_parameter_space={ 'alpha': hp.choice('alpha', [0.0001, 10]), 'early_stopping': True, 'max_iter': 50, '__max_evals': 4, '__rstate': np.random.RandomState(42) }) """then test best parameter""" self.assertEqual(fit.model.skit_model.get_params()['alpha'], 0.0001) pass
def provide_regression_model(self, features_and_labels): model = SkModel( MLPRegressor(1, learning_rate_init=0.01, solver='sgd', activation='identity', momentum=0, max_iter=1500, n_iter_no_change=500, nesterovs_momentum=False, shuffle=False, validation_fraction=0.0, random_state=42), features_and_labels, ) return model
def test_simple_classification_model(self): df = DF_NOTES.copy() with df.model() as m: fit = m.fit( SkModel( MLPClassifier(activation='tanh', hidden_layer_sizes=(20, 12), random_state=42, max_iter=2), FeaturesAndLabels( features=["variance", "skewness", "kurtosis", "entropy"], labels=["authentic"], label_type=bool ) ), FittingParameter(stratified_random_splitter()) ) print(fit) html = fit._repr_html_() prediction = df.model.predict(fit.model) print(prediction) self.assertGreaterEqual(prediction[PREDICTION_COLUMN_NAME].iloc[-1].values, 0.68) backtest = df.model.backtest(fit.model) self.assertIn(FEATURE_COLUMN_NAME, backtest.df) self.assertIn(LABEL_COLUMN_NAME, backtest.df) np.testing.assert_array_almost_equal(prediction[PREDICTION_COLUMN_NAME].iloc[-1].values, backtest.df[PREDICTION_COLUMN_NAME].iloc[-1].values) # test multiple samples samples = df.model.predict(fit.model, samples=2) self.assertIsInstance(samples[PREDICTION_COLUMN_NAME].iloc[-1, 0], list) self.assertEqual(2, len(samples[PREDICTION_COLUMN_NAME].iloc[-1, 0]))
def test_simple_classification_model(self): df = DF_TEST.copy() fit = df.model.fit( SkModel( MLPClassifier(activation='tanh', hidden_layer_sizes=(60, 50), random_state=42, max_iter=2), FeaturesAndLabels( features=[ lambda df: df["Close"].ta.rsi().ta.rnn(28), lambda df: (df["Volume"] / df["Volume"].ta.ema(14) - 1).ta.rnn(28) ], labels=[ lambda df: (df["Close"] > df["Open"]).shift(-1), ] ), # kwargs forecasting_time_steps=7 ) ) print(fit) html = fit._repr_html_() prediction = df.model.predict(fit.model) print(prediction) self.assertIsInstance(prediction[PREDICTION_COLUMN_NAME, 0].iloc[-1], (float, np.float, np.float32, np.float64)) backtest = df.model.backtest(fit.model) # test multiple samples samples = df.model.predict(fit.model, samples=2) self.assertIsInstance(samples[PREDICTION_COLUMN_NAME, 0].iloc[-1], list) self.assertEqual(2, len(samples[PREDICTION_COLUMN_NAME, 0].iloc[-1]))
def test_linear_model(self): df = DF_TEST.copy() fit = df.model.fit( SkModel( Lasso(), FeaturesAndLabels( features=[ lambda df: df["Close"].ta.rsi().ta.rnn(28), lambda df: (df["Volume"] / df["Volume"].ta.ema(14) - 1).ta.rnn(28) ], labels=[ lambda df: (df["Close"] / df["Open"] - 1).shift(-1), ] ), summary_provider=RegressionSummary ), NaiveSplitter() ) print(fit) prediction = df.model.predict(fit.model) print(prediction) backtest = df.model.backtest(fit.model)
def test_simple_regression_model(self): df = DF_TEST.copy() fit = df.model.fit( SkModel( MLPRegressor(activation='tanh', hidden_layer_sizes=(60, 50), random_state=42, max_iter=2), FeaturesAndLabels( features=[ lambda df: df["Close"].ta.rsi().ta.rnn(28), lambda df: (df["Volume"] / df["Volume"].ta.ema(14) - 1).ta.rnn(28) ], labels=[ lambda df: (df["Close"] / df["Open"] - 1).shift(-1), ] ), summary_provider=RegressionSummary ), NaiveSplitter() ) print(fit) html = fit._repr_html_() prediction = df.model.predict(fit.model) print(prediction) self.assertIsInstance(prediction[PREDICTION_COLUMN_NAME, 0].iloc[-1], (float, np.float, np.float32, np.float64)) backtest = df.model.backtest(fit.model)
def test_linear_model(self): df = DF_NOTES.copy() with df.model() as m: fit = m.fit( SkModel( Lasso(), FeaturesAndLabels( features=[ lambda df: df["variance"], lambda df: (df["skewness"] / df["kurtosis"]).rename("engineered") ], labels=[ 'authentic' ] ) ), FittingParameter(naive_splitter()) ) print(fit) prediction = df.model.predict(fit.model) print(prediction) backtest = df.model.backtest(fit.model) self.assertLess(backtest.model.sk_model.coef_[0], 1e-5)
def test_KFold(self): df = DF_TEST.copy() fit = df.model.fit( SkModel( MLPClassifier(activation='tanh', hidden_layer_sizes=(60, 50), random_state=42, max_iter=2), FeaturesAndLabels( features=extract_with_post_processor( [ lambda df: df["Close"].ta.trix(), lambda df: df["Close"].ta.ppo(), lambda df: df["Close"].ta.apo(), lambda df: df["Close"].ta.macd(), lambda df: df.ta.adx(), ], lambda df: df.ta.rnn(range(10)) ), labels=[ lambda df: df["Close"].ta.sma(period=60) \ .ta.cross(df["Close"].ta.sma(period=20)) \ .ta.rnn([1, 2, 3, 4, 5]) \ .abs() \ .sum(axis=1) \ .shift(-5) \ .astype(bool) ] ) ), RandomSplits(test_size=0.4, test_validate_split_seed=42, cross_validation=(1, KFoldBoostRareEvents(n_splits=5).split)) )
def test_simple_classification_cross_validation(self): df = DF_NOTES.copy() with df.model() as m: fit = m.fit( SkModel( MLPClassifier(activation='tanh', hidden_layer_sizes=(20, 12), random_state=42, max_iter=2), FeaturesAndLabels( features=["variance", "skewness", "kurtosis", "entropy"], labels=["authentic"], label_type=bool ) ), FittingParameter( splitter=random_splitter(), cross_validation=KFold(3, random_state=42, shuffle=True) ) ) print(fit) html = fit._repr_html_() prediction = df.model.predict(fit.model) print(prediction) self.assertGreaterEqual(prediction[PREDICTION_COLUMN_NAME].iloc[-1].values, 0.65)
def provide_classification_model(self, features_and_labels): model = SkModel( MLPClassifier(activation='logistic', max_iter=1000, hidden_layer_sizes=(3,), alpha=0.001, solver='lbfgs', random_state=42), features_and_labels, ) return model
def test_multi_model(self): """given some toy classification data""" df = pd.DataFrame({ "a": [ 1, 0, 1, 0, 1, 0, 1, 0, ], "b": [ 0, 0, 1, 1, 0, 0, 1, 1, ], "c": [ 1, 0, 0, 1, 1, 0, 0, 1, ], }) model = MultiModel(SkModel( MLPClassifier(activation='logistic', max_iter=1000, hidden_layer_sizes=(3, ), alpha=0.001, solver='lbfgs', random_state=42), FeaturesAndLabels(features=["a", "b"], labels=[lambda df, i: df["c"].rename(f"c_{i}")], label_type=int), summary_provider=ClassificationSummary), 2, model_index_variable="i", summary_provider=MultiModelSummary) fit = df.model.fit(model, NaiveSplitter(0.49), epochs=1500, verbose=True) print(fit.training_summary._repr_html_()[:100]) pdf = df.model.predict(fit.model, tail=2) print(pdf)
def provide_non_linear_regression_model(self): from sklearn.neural_network import MLPRegressor from pandas_ml_utils import FeaturesAndLabels, SkModel return [ ( SkModel( MLPRegressor(200, learning_rate_init=0.001, max_iter=5000, validation_fraction=0), FeaturesAndLabels(["x"], ["y"]) ), FittingParameter(epochs=1, context="epoch 1 fit"), ), ( SkModel( MLPRegressor(200, learning_rate_init=0.001, max_iter=1, validation_fraction=0, warm_start=True), FeaturesAndLabels(["x"], ["y"]) ), FittingParameter(epochs=5000, context="partial fit"), ) ]
def test_simple_classification_model_with_all_options(self): df = DF_NOTES.copy() with df.model() as m: fit = m.fit( SkModel( MLPClassifier(activation='tanh', hidden_layer_sizes=(20, 12), random_state=42, max_iter=2), FeaturesAndLabels( features=["variance", "skewness", "kurtosis", "entropy"], sample_weights=["variance"], gross_loss=["kurtosis"], targets=["entropy"], labels=["authentic"], label_type=bool ) ), FittingParameter(stratified_random_splitter()) ) # should not thro an error html = fit._repr_html_() # fit resulting columns print(fit.test_summary.df) self.assertIn(GROSS_LOSS_COLUMN_NAME, fit.training_summary.df) self.assertIn(FEATURE_COLUMN_NAME, fit.training_summary.df) self.assertIn(LABEL_COLUMN_NAME, fit.training_summary.df) self.assertIn(TARGET_COLUMN_NAME, fit.training_summary.df) self.assertIn(FEATURE_COLUMN_NAME, fit.test_summary.df) self.assertIn(LABEL_COLUMN_NAME, fit.test_summary.df) self.assertIn(TARGET_COLUMN_NAME, fit.test_summary.df) # prediction resulting columns prediction = df.model.predict(fit.model) print(prediction) self.assertIn(FEATURE_COLUMN_NAME, prediction) self.assertIn(TARGET_COLUMN_NAME, prediction) # backtest resulting columns backtest = df.model.backtest(fit.model) print(backtest.df) self.assertEqual(len(df), len(backtest.df)) self.assertIn(FEATURE_COLUMN_NAME, backtest.df) self.assertIn(LABEL_COLUMN_NAME, backtest.df) self.assertIn(TARGET_COLUMN_NAME, backtest.df) self.assertIn(GROSS_LOSS_COLUMN_NAME, backtest.df)
def test_future_bband_quantile_clasification(self): df = DF_TEST.copy() fit = df.model.fit( SkModel( MLPClassifier(activation='tanh', hidden_layer_sizes=(60, 50), random_state=42, warm_start=True, max_iter=2), FeaturesAndLabels( features=extract_with_post_processor([ lambda df: df["Close"].ta.macd()._[ ['macd.*', 'signal.*']], lambda df: df.ta.adx()._[['+DI', '-DM', '+DM']], lambda df: df["Close"].ta.mom(), lambda df: df["Close"].ta.apo(), lambda df: df.ta.atr(), lambda df: df["Close"].ta.trix(), ], lambda df: df.ta.rnn(280)), labels=[ lambda df: df["Close"].ta.future_bband_quantile( include_mean=False).ta.one_hot_encode_discrete() ], targets=[ lambda df: df["Close"].ta.bbands()[["lower", "upper"]] ]), summary_provider=ClassificationSummary, ), RandomSplits( test_size=0.4, test_validate_split_seed=42, cross_validation=(1, KEquallyWeightEvents(n_splits=3).split))) print(fit) prediction = df.model.predict(fit.model, tail=3) self.assertEqual(3, len(prediction)) self.assertEqual( (3, ), np.array(prediction[PREDICTION_COLUMN_NAME].iloc[-1, -1]).shape) target_predictions = prediction.map_prediction_to_target() print(target_predictions) self.assertEqual(9, len(target_predictions))
def test_debug(self): df = DF_TEST.copy() fit = df.model.fit( SkModel( MLPClassifier(activation='tanh', hidden_layer_sizes=(60, 50), random_state=42, warm_start=True, max_iter=2), FeaturesAndLabels( features=extract_with_post_processor( [ lambda df: df["Close"].q.ta_macd().ml[['macd.*', 'signal.*']], lambda df: df.q.ta_adx().ml[['+DI', '-DM', '+DM']], lambda df: df["Close"].q.ta_mom(), lambda df: df["Close"].q.ta_apo(), lambda df: df.q.ta_atr(), lambda df: df["Close"].q.ta_trix(), ], lambda df: df.q.ta_rnn(280) ), labels=[ lambda df: df["Close"].q.ta_future_bband_quantile().q.ta_one_hot_encode_discrete() ], targets=[ lambda df: df["Close"].q.ta_bbands()[["lower", "upper"]] ] ), summary_provider=ClassificationSummary, ), test_size=0.4, test_validate_split_seed=42, cross_validation=(1, KEquallyWeightEvents(n_splits=3).split), ) print(fit) prediction = df.model.predict(fit.model, tail=3) self.assertEqual(3, len(prediction)) target_predictions = prediction.map_prediction_to_target() print(target_predictions) self.assertEqual(9, len(target_predictions))
def test_multi_model_kwargs(self): """given some toy classification data""" df = pd.DataFrame({ "a": [ 1, 0, 1, 0, 1, 0, 1, 0, ], "b": [ 0, 0, 1, 1, 0, 0, 1, 1, ], "c": [ 1, 0, 0, 1, 1, 0, 0, 1, ], }) model = MultiModel(SkModel( MLPClassifier(activation='logistic', max_iter=1000, hidden_layer_sizes=(3, ), alpha=0.001, solver='lbfgs', random_state=42), FeaturesAndLabels(features=["a", "b"], labels=[lambda df, index: df["c"].rename(index)], label_type=int, index=["z"]), ), ["c1", "c2"], model_index_variable="index") fit = df.model.fit(model, NaiveSplitter(0.49), epochs=1500, verbose=True) print(fit.training_summary.df) self.assertEqual( 4, len(fit.training_summary.df[PREDICTION_COLUMN_NAME, "c1"])) self.assertEqual( 4, len(fit.training_summary.df[PREDICTION_COLUMN_NAME, "c2"])) np.testing.assert_array_almost_equal( fit.training_summary.df[PREDICTION_COLUMN_NAME, "c1"], fit.training_summary.df[PREDICTION_COLUMN_NAME, "c2"])