def test_fit_predict_predictor_regression(mock_show): """Test fit_predict method of Predictor class for regression.""" rd = Reader(sep=',') dt = rd.train_test_split(Lpath=[ "data_for_tests/train_regression.csv", "data_for_tests/test_regression.csv" ], target_name="SalePrice") drift_thresholder = Drift_thresholder() df = drift_thresholder.fit_transform(dt) mape = make_scorer(lambda y_true, y_pred: 100 * np.sum( np.abs(y_true - y_pred) / y_true) / len(y_true), greater_is_better=False, needs_proba=False) opt = Optimiser(scoring=mape, n_folds=3) opt.evaluate(None, df) space = { 'ne__numerical_strategy': { "search": "choice", "space": [0] }, 'ce__strategy': { "search": "choice", "space": ["label_encoding", "random_projection", "entity_embedding"] }, 'fs__threshold': { "search": "uniform", "space": [0.01, 0.3] }, 'est__max_depth': { "search": "choice", "space": [3, 4, 5, 6, 7] } } best = opt.optimise(space, df, 1) prd = Predictor(verbose=True) prd.fit_predict(best, df) pred_df = pd.read_csv("save/SalePrice_predictions.csv") assert np.all( list(pred_df.columns) == ['Unnamed: 0', 'SalePrice_predicted']) assert np.shape(pred_df) == (1459, 2)
def test_set_params_predictor(): """Test set_params method of Predictor class.""" predictor = Predictor() predictor.set_params(to_path="name") assert predictor.to_path == "name" predictor.set_params(verbose=False) assert not predictor.verbose with pytest.warns(UserWarning) as record: predictor.set_params(wrong_key=3) assert len(record) == 1
def test_fit_predict_predictor_classification(): """Test fit_predict method of Predictor class for classification.""" reader = Reader(sep=",") dict = reader.train_test_split( Lpath=["data_for_tests/train.csv", "data_for_tests/test.csv"], target_name="Survived") drift_thresholder = Drift_thresholder() drift_thresholder = drift_thresholder.fit_transform(dict) with pytest.warns(UserWarning) as record: opt = Optimiser(scoring='accuracy', n_folds=3) assert len(record) == 1 space = { 'ne__numerical_strategy': { "search": "choice", "space": [0] }, 'ce__strategy': { "search": "choice", "space": ["label_encoding", "random_projection", "entity_embedding"] }, 'fs__threshold': { "search": "uniform", "space": [0.01, 0.3] }, 'est__max_depth': { "search": "choice", "space": [3, 4, 5, 6, 7] } } optimal_hyper_parameters = opt.optimise(space, dict, 1) predictor = Predictor(verbose=False) predictor.fit_predict(optimal_hyper_parameters, dict) pred_df = pd.read_csv("save/Survived_predictions.csv") assert np.all( list(pred_df.columns) == ['Unnamed: 0', '0.0', '1.0', 'Survived_predicted']) assert np.shape(pred_df) == (418, 4)
df = dft.fit_transform(df) opt = Optimiser() warnings.filterwarnings('ignore', category=DeprecationWarning) score = opt.evaluate(None, df) space = { 'ne__numerical_strategy':{"search":"choice", "space":[0, "mean"]}, 'ce__strategy':{"search":"choice", "space":["label_encoding", "random_projection", "entity_embedding"]}, 'fs__threshold':{"search":"uniform", "space":[0.001, 0.2]}, 'est__strategy':{"search":"choice", "space":["RandomForest", "ExtraTrees", "LightGBM"]}, 'est__max_depth':{"search":"choice", "space":[8, 9, 10, 11, 12, 13]} } """ #Clf_feature_selector(strategy='l1', threshold=0.3) #Categorical_encoder(strategy = "") StackingClassifier([Classifier(strategy="AdaBoost"), Classifier(strategy="AdaBoost"),Classifier(strategy="AdaBoost")]) space = { 'fs__strategy':{"search":"choice","space":["variance","rf_feature_importance"]},'est__colsample_bytree':{"search":"uniform", "space":[0.3,0.7]}} """ params = opt.optimise(space, df, 15) #print(opt.evaluate(params, df)) prd = Predictor() y_predproba = prd.fit_predict(params, df) """ download file from left hand panel """
target_name = "class" rd = Reader(sep=",") df = rd.train_test_split(paths, target_name) df["train"].head() dft = Drift_thresholder().fit_transform(df) opt = Optimiser() warnings.filterwarnings('ignore', category=DeprecationWarning) score = opt.evaluate(None, df) space = { 'ne__numerical_strategy':{"search":"choice", "space":[0, "mean"]}, 'ce__strategy':{"search":"choice", "space":["label_encoding", "random_projection", "entity_embedding"]}, 'fs__threshold':{"search":"uniform", "space":[0.001, 0.2]}, 'est__strategy':{"search":"choice", "space":["RandomForest", "ExtraTrees", "LightGBM"]}, 'est__max_depth':{"search":"choice", "space":[-1, 9, 10, 11, 12, 13]} } params = opt.optimise(space, df, 50) opt.evaluate(params, df) prd = Predictor() prd.fit_predict(params, df)
def test_get_params_predictor(): """Test get_params method of Predictor class.""" predictor = Predictor() dict = {'to_path': "save", 'verbose': True} assert predictor.get_params() == dict
def test_init_predictor(): """Test init method of Predictor class.""" predictor = Predictor() assert predictor.to_path == "save" assert predictor.verbose