def test_unmarked_categorical_column_throws_warning(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output' # This is the column we are "forgetting" to mark as categorical # , 'sex': 'categorical' , 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) with warnings.catch_warnings(record=True) as caught_w: ml_predictor.train(df_titanic_train) print( 'we should be throwing a warning for the user to give them useful feedback on the unlabeled categorical column' ) assert len(caught_w) == 1 ml_predictor.predict(df_titanic_test) # We want to make sure the above does not throw an error assert True
def test_unexpected_datetime_column_handled_without_errors(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'sex': 'categorical', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train) test_dict = df_titanic_test.sample(frac=0.1).to_dict('records')[0] test_dict['unexpected_column'] = datetime.date.today() test_dict['anoter_unexpected_column'] = datetime.datetime.today() ml_predictor.predict(test_dict) # We want to make sure the above does not throw an error assert True
'LGBMClassifier', 'CatBoostClassifier', 'XGBClassifier', ], # chose all Classifiers #optimize_final_model=True, # falls with an error :( #feature_learning=True, #fl_data=X_test.copy(), ml_for_analytics=False, verbose=False, ) print(f'PREDICT ON FOLD {count}') predictions = automl.predict_proba(X_test) print('AUC: ', roc_auc_score(y_test, predictions[:, 1])) y_test_predict_proba = predictions[:, 1] y_test_predict = automl.predict(X_test) END_EXPERIMENT = time.time() #preds = pd.DataFrame(predictions) #preds['Y'] = y_test.reset_index(drop=True) # preds.to_csv(f'./result/predicts/{DATASET_NAME}_{MODEL_NAME}_predict_proba_exp_{EXPERIMENT}.csv', index=False,) metrics.append({ 'AUC': round(roc_auc_score(y_test, y_test_predict_proba), 4), 'log_loss': round(log_loss(y_test, y_test_predict_proba), 4), 'Accuracy': round(accuracy_score(y_test, y_test_predict), 4), 'Time_min': (END_EXPERIMENT - START_EXPERIMENT) // 60,