def test_scoring_logreg_tune_correct(data_fixture, request): train_data, test_data = request.getfixturevalue(data_fixture) train_data.features = Scaling().fit(train_data.features).apply( train_data.features) test_data.features = Scaling().fit(test_data.features).apply( test_data.features) logreg = Model(model_type='logit') model, _ = logreg.fit(train_data) test_predicted = logreg.predict(fitted_model=model, data=test_data) test_roc_auc = roc_auc(y_true=test_data.target, y_score=test_predicted) logreg_for_tune = Model(model_type='logit') model_tuned, _ = logreg_for_tune.fine_tune( train_data, iterations=50, max_lead_time=timedelta(minutes=0.1)) test_predicted_tuned = logreg_for_tune.predict(fitted_model=model_tuned, data=test_data) test_roc_auc_tuned = roc_auc(y_true=test_data.target, y_score=test_predicted_tuned) roc_threshold = 0.6 assert round(test_roc_auc_tuned, 2) >= round(test_roc_auc, 2) > roc_threshold
def test_log_clustering_fit_correct(data_fixture, request): data = request.getfixturevalue(data_fixture) data.features = Scaling().fit(data.features).apply(data.features) train_data, test_data = train_test_data_setup(data=data) kmeans = Model(model_type=ModelTypesIdsEnum.kmeans) _, train_predicted = kmeans.fit(data=train_data) assert all(np.unique(train_predicted) == [0, 1])
def test_qda_fit_correct(data_fixture, request): data = request.getfixturevalue(data_fixture) data.features = Scaling().fit(data.features).apply(data.features) train_data, test_data = train_test_data_setup(data=data) qda = Model(model_type=ModelTypesIdsEnum.qda) _, train_predicted = qda.fit(data=train_data) roc_on_train = roc_auc(y_true=train_data.target, y_score=train_predicted) roc_threshold = 0.95 assert roc_on_train >= roc_threshold
def test_log_regression_fit_correct(classification_dataset): data = classification_dataset data.features = Scaling().fit(data.features).apply(data.features) train_data, test_data = train_test_data_setup(data=data) log_reg = Model(model_type=ModelTypesIdsEnum.logit) _, train_predicted = log_reg.fit(data=train_data) roc_on_train = roc_auc(y_true=train_data.target, y_score=train_predicted) roc_threshold = 0.95 assert roc_on_train >= roc_threshold
def test_lda_fit_correct(data_fixture, request): data = request.getfixturevalue(data_fixture) data.features = Scaling().fit(data.features).apply(data.features) train_data, test_data = train_test_data_setup(data=data) lda = Model(model_type='lda') _, train_predicted = lda.fit(data=train_data) roc_on_train = get_roc_auc(train_data, train_predicted) roc_threshold = 0.95 assert roc_on_train >= roc_threshold
def test_log_regression_fit_correct(classification_dataset): data = classification_dataset data.features = Scaling().fit(data.features).apply(data.features) train_data, test_data = train_test_data_setup(data=data) log_reg = Model(model_type='logit') _, train_predicted = log_reg.fit(data=train_data) roc_on_train = get_roc_auc(train_data, train_predicted) roc_threshold = 0.95 assert roc_on_train >= roc_threshold
def test_classification_manual_tuning_correct(data_fixture, request): data = request.getfixturevalue(data_fixture) data.features = Scaling().fit(data.features).apply(data.features) train_data, test_data = train_test_data_setup(data=data) knn = Model(model_type='knn') model, _ = knn.fit(data=train_data) test_predicted = knn.predict(fitted_model=model, data=test_data) knn_for_tune = Model(model_type='knn') knn_for_tune.params = {'n_neighbors': 1} model, _ = knn_for_tune.fit(data=train_data) test_predicted_tuned = knn_for_tune.predict(fitted_model=model, data=test_data) assert not np.array_equal(test_predicted, test_predicted_tuned)
def test_knn_classification_tune_correct(data_fixture, request): data = request.getfixturevalue(data_fixture) data.features = Scaling().fit(data.features).apply(data.features) train_data, test_data = train_test_data_setup(data=data) knn = Model(model_type='knn') model, _ = knn.fit(data=train_data) test_predicted = knn.predict(fitted_model=model, data=test_data) roc_on_test = roc_auc(y_true=test_data.target, y_score=test_predicted) knn_for_tune = Model(model_type='knn') model, _ = knn_for_tune.fine_tune(data=train_data, iterations=10, max_lead_time=timedelta(minutes=1)) test_predicted_tuned = knn.predict(fitted_model=model, data=test_data) roc_on_test_tuned = roc_auc(y_true=test_data.target, y_score=test_predicted_tuned) roc_threshold = 0.6 assert roc_on_test_tuned > roc_on_test > roc_threshold
def test_max_lead_time_in_tune_process(data_fixture, request): data = request.getfixturevalue(data_fixture) data.features = Scaling().fit(data.features).apply(data.features) train_data, test_data = train_test_data_setup(data=data) start = datetime.now() knn_for_tune = Model(model_type='knn') model, _ = knn_for_tune.fine_tune(data=train_data, max_lead_time=timedelta(minutes=0.05), iterations=100) test_predicted_tuned = knn_for_tune.predict(fitted_model=model, data=test_data) roc_on_test_tuned = roc_auc(y_true=test_data.target, y_score=test_predicted_tuned) roc_threshold = 0.6 spent_time = (datetime.now() - start).seconds assert roc_on_test_tuned > roc_threshold assert spent_time == 3
def test_rf_class_tune_correct(data_fixture, request): data = request.getfixturevalue(data_fixture) data.features = Scaling().fit(data.features).apply(data.features) train_data, test_data = train_test_data_setup(data=data) rf = Model(model_type='rf') model, _ = rf.fit(train_data) test_predicted = rf.predict(fitted_model=model, data=test_data) test_roc_auc = roc_auc(y_true=test_data.target, y_score=test_predicted) model_tuned, _ = rf.fine_tune(data=train_data, iterations=12, max_lead_time=timedelta(minutes=0.1)) test_predicted_tuned = rf.predict(fitted_model=model_tuned, data=test_data) test_roc_auc_tuned = roc_auc(y_true=test_data.target, y_score=test_predicted_tuned) roc_threshold = 0.7 assert test_roc_auc_tuned != test_roc_auc assert test_roc_auc_tuned > roc_threshold
def test_pca_manual_tuning_correct(data_fixture, request): data = request.getfixturevalue(data_fixture) data.features = Scaling().fit(data.features).apply(data.features) train_data, test_data = train_test_data_setup(data=data) pca = Model(model_type='pca_data_model') model, _ = pca.fit(data=train_data) test_predicted = pca.predict(fitted_model=model, data=test_data) pca_for_tune = Model(model_type='pca_data_model') pca_for_tune.params = { 'svd_solver': 'randomized', 'iterated_power': 'auto', 'dim_reduction_expl_thr': 0.7, 'dim_reduction_min_expl': 0.001 } model, _ = pca_for_tune.fit(data=train_data) test_predicted_tuned = pca_for_tune.predict(fitted_model=model, data=test_data) assert not np.array_equal(test_predicted, test_predicted_tuned)