def test_predict_uncertainty_returns_dict_for_one_value(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } df_boston_train, uncertainty_data = train_test_split(df_boston_train, test_size=0.5) ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, perform_feature_selection=True, train_uncertainty_model=True, uncertainty_data=uncertainty_data) test_list = df_boston_test.to_dict('records') for item in test_list: prediction = ml_predictor.predict_uncertainty(item) assert isinstance(prediction, dict)
def test_select_from_multiple_regression_models_using_X_test_and_y_test(): df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, model_names=['LinearRegression', 'RandomForestRegressor', 'Ridge', 'GradientBoostingRegressor', 'ExtraTreesRegressor', 'AdaBoostRegressor', 'SGDRegressor', 'PassiveAggressiveRegressor'], X_test=df_boston_test, y_test=df_boston_test.MEDV) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) # Due to the small sample size of this test set, GSCV may sometimes pick ExtraTreesRegressor as the best model, just very slightly beating out GradientBoostingRegressor. # ExtraTrees doesn't generalize as well, however, scoring a mere -3.20x something or other, and narrowly missing our cutoff from above. # Given that is is only an issue when running on tiny toy datasets, I'm not concerned for the use cases I intend to support, and thus, am bumping up the upper bound on our error metric ever so slightly assert -3.25 < test_score < -2.8
def test_perform_feature_selection_true_regression(): np.random.seed(42) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, perform_feature_selection=True, model_names=['DeepLearningRegressor']) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) # Bumping this up since without these features our score drops assert -24 < test_score < -2.8
def test_perform_feature_scaling_true_classification(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset() column_descriptions = { 'survived': 'output' , 'embarked': 'categorical' , 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, perform_feature_scaling=True, model_names=['LGBMClassifier']) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.215 < test_score < -0.17
def test_nans_in_output_column(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset() column_descriptions = { 'survived': 'output' , 'embarked': 'categorical' , 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.215 < test_score < -0.17
def test_compare_all_models_regression(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, compare_all_models=True) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) # ExtraTrees again throws this off assert -3.6 < test_score < -2.8
def test_throws_warning_when_fl_data_equals_df_train(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset() column_descriptions = { 'survived': 'output' , 'embarked': 'categorical' , 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) with warnings.catch_warnings(record=True) as w: try: ml_predictor.train(df_titanic_train, feature_learning=True, fl_data=df_titanic_train) except KeyError as e: pass # We should not be getting to this line- we should be throwing an error above for thing in w: print(thing) assert len(w) == 1
def test_all_algos_regression(): # a random seed of 42 has ExtraTreesRegressor getting the best CV score, and that model doesn't generalize as well as GradientBoostingRegressor. np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, model_names=['LinearRegression', 'RandomForestRegressor', 'Ridge', 'GradientBoostingRegressor', 'AdaBoostRegressor', 'SGDRegressor', 'PassiveAggressiveRegressor', 'Lasso', 'LassoLars', 'ElasticNet', 'OrthogonalMatchingPursuit', 'BayesianRidge', 'ARDRegression', 'MiniBatchKMeans', 'DeepLearningRegressor', 'LGBMRegressor', 'XGBClassifier', 'LinearSVR', 'CatBoostRegressor']) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -3.4 < test_score < -2.8
def test_binary_classification(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset() column_descriptions = { 'survived': 'output' , 'embarked': 'categorical' , 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, scoring=always_return_ten_thousand) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert test_score == -10000
def test_bad_val_in_column_descriptions(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical', 'fare': 'this_is_a_bad_value' } with warnings.catch_warnings(record=True) as w: ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) print( 'we should be throwing a warning for the user to give them useful feedback' ) assert len(w) == 1
def test_perform_feature_selection_false_classification(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, perform_feature_selection=False) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.215 < test_score < -0.17
def test_optimize_final_model_classification(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, optimize_final_model=True) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.215 < test_score < -0.17
def test_select_from_multiple_classification_models_using_X_test_and_y_test(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset() column_descriptions = { 'survived': 'output' , 'embarked': 'categorical' , 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, model_names=['LGBMClassifier', 'LogisticRegression', 'RandomForestClassifier', 'RidgeClassifier', 'GradientBoostingClassifier', 'ExtraTreesClassifier', 'AdaBoostClassifier', 'SGDClassifier', 'Perceptron', 'PassiveAggressiveClassifier'], X_test=df_titanic_test, y_test=df_titanic_test.survived) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.215 < test_score < -0.17
def test_prediction_intervals_lets_the_user_specify_number_of_intervals(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} df_boston_train, uncertainty_data = train_test_split(df_boston_train, test_size=0.5) ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, predict_intervals=True, prediction_intervals=[.2]) intervals = ml_predictor.predict_intervals(df_boston_test, return_type='list') assert len(intervals[0]) == 2
def test_user_input_func_classification(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) def age_bucketing(df): def define_buckets(age): if age <= 17: return 'youth' elif age <= 40: return 'adult' elif age <= 60: return 'adult2' else: return 'over_60' df['age_bucket'] = df.age.apply(define_buckets) return df column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, perform_feature_scaling=False, user_input_func=age_bucketing) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.215 < test_score < -0.17
def optimize_final_model_classification(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) # We just want to make sure these run, not necessarily make sure that they're super accurate (which takes more time, and is dataset dependent) df_titanic_train = df_titanic_train.sample(frac=0.5) column_descriptions = { 'survived': 'output', 'sex': 'categorical', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, optimize_final_model=True, model_names=model_name) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) # Small sample sizes mean there's a fair bit of noise here lower_bound = -0.18 if model_name == 'DeepLearningClassifier': lower_bound = -0.255 if model_name == 'LGBMClassifier': lower_bound = -0.221 if model_name == 'GradientBoostingClassifier': lower_bound = -0.225 if model_name == 'CatBoostClassifier': lower_bound = -0.221 assert lower_bound < test_score < -0.135
def test_predict_uncertainty_true(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} df_boston_train, uncertainty_data = train_test_split(df_boston_train, test_size=0.5) ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, predict_intervals=True) intervals = ml_predictor.predict_intervals(df_boston_test) assert isinstance(intervals, list) assert isinstance(intervals[0], list) assert len(intervals[0]) == 4 assert len(intervals) == len(df_boston_test) singles = df_boston_test.head().to_dict('records') for row in singles: result = ml_predictor.predict_intervals(row) assert isinstance(result, dict) assert 'prediction' in result assert 'prediction_lower' in result assert 'prediction_upper' in result assert 'prediction_median' in result for row in singles: result = ml_predictor.predict_intervals(row, return_type='list') assert isinstance(result, list) assert len(result) == 4 df_intervals = ml_predictor.predict_intervals(df_boston_test, return_type='df') assert isinstance(df_intervals, pd.DataFrame)
def test_score_uncertainty(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } df_boston_train, uncertainty_data = train_test_split(df_boston_train, test_size=0.5) ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, perform_feature_selection=True, train_uncertainty_model=True, uncertainty_data=uncertainty_data) uncertainty_score = ml_predictor.score_uncertainty(df_boston_test, df_boston_test.MEDV) print('uncertainty_score') print(uncertainty_score) assert uncertainty_score > -0.2
def test_predict_uncertainty_returns_pandas_DataFrame_for_more_than_one_value( ): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} df_boston_train, uncertainty_data = train_test_split(df_boston_train, test_size=0.5) ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, perform_feature_selection=True, train_uncertainty_model=True, uncertainty_data=uncertainty_data) uncertainties = ml_predictor.predict_uncertainty(df_boston_test) assert isinstance(uncertainties, pd.DataFrame)
def test_unexpected_datetime_column_handled_without_errors(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset() column_descriptions = { 'survived': 'output' , 'embarked': 'categorical' , 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train) test_dict = df_titanic_test.sample(frac=0.1).to_dict('records')[0] test_dict['unexpected_column'] = datetime.date.today() test_dict['anoter_unexpected_column'] = datetime.datetime.today() ml_predictor.predict(test_dict) # We want to make sure the above does not throw an error assert True
def test_perform_feature_scaling_false_regression(model_name=None): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, perform_feature_scaling=False, model_names=model_name) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) lower_bound = -3.0 assert lower_bound < test_score < -2.7
def test_X_test_and_y_test_regression(): np.random.seed(42) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) print(df_boston_test) ml_predictor.train(df_boston_train, X_test=df_boston_test, y_test=df_boston_test.MEDV, model_names=['DeepLearningRegressor']) print(df_boston_test) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -24 < test_score < -2.8
def test_model_uses_user_provided_training_params(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) try: ml_predictor.train(df_titanic_train, model_names='RidgeClassifier', training_params={'this_param_is_not_valid': True}) assert False except ValueError as e: assert True
def train_basic_binary_classifier(): np.random.seed(0) df_titanic_train, df_titanic_test = get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'name': 'ignore', 'sex': 'categorical', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, verbose=False, perform_feature_scaling=False, model_names=['LGBMClassifier'], perform_feature_selection=False) return ml_predictor, df_titanic_test
def test_all_algos_classification(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset() column_descriptions = { 'survived': 'output' , 'sex': 'categorical' , 'embarked': 'categorical' , 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, model_names=['LogisticRegression', 'RandomForestClassifier', 'RidgeClassifier', 'GradientBoostingClassifier', 'ExtraTreesClassifier', 'AdaBoostClassifier', 'SGDClassifier', 'Perceptron', 'PassiveAggressiveClassifier', 'DeepLearningClassifier', 'XGBClassifier', 'LGBMClassifier', 'LinearSVC']) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) # Linear models aren't super great on this dataset... assert -0.215 < test_score < -0.131
def test_pass_in_list_of_dictionaries_predict_classification(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset() column_descriptions = { 'survived': 'output' , 'embarked': 'categorical' , 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) list_titanic_train = df_titanic_train.to_dict('records') ml_predictor.train(df_titanic_train, model_names=model_name) test_score = ml_predictor.score(df_titanic_test.to_dict('records'), df_titanic_test.survived) print('test_score') print(test_score) assert -0.215 < test_score < -0.17
def test_input_df_unmodified(): np.random.seed(42) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) df_shape = df_boston_train.shape ml_predictor.train(df_boston_train) training_shape = df_boston_train.shape assert training_shape[0] == df_shape[0] assert training_shape[1] == df_shape[1] test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -3.35 < test_score < -2.8
def test_list_of_single_model_name_classification(): np.random.seed(0) model_name = 'GradientBoostingClassifier' df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset() column_descriptions = { 'survived': 'output' , 'sex': 'categorical' , 'embarked': 'categorical' , 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, model_names=[model_name]) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.16 < test_score < -0.135
def test_verify_features_finds_missing_training_features(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical', 'sex': 'categorical' } # Remove the "sibsp" column from our training data df_titanic_train = df_titanic_train.drop('sibsp', axis=1) ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, verify_features=True) file_name = ml_predictor.save(str(random.random())) with open(file_name, 'rb') as read_file: saved_ml_pipeline = dill.load(read_file) os.remove(file_name) missing_features = saved_ml_pipeline.named_steps[ 'final_model'].verify_features(df_titanic_test) print('missing_features') print(missing_features) print("len(missing_features['prediction_not_training'])") print(len(missing_features['prediction_not_training'])) print("len(missing_features['training_not_prediction'])") print(len(missing_features['training_not_prediction'])) assert len(missing_features['prediction_not_training']) == 1 assert len(missing_features['training_not_prediction']) == 0
def optimize_final_model_regression(model_name=None): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() # We just want to make sure these run, not necessarily make sure that they're super accurate (which takes more time, and is dataset dependent) df_boston_train = df_boston_train.sample(frac=0.5) column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, optimize_final_model=True, model_names=model_name) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) # the random seed gets a score of -3.21 on python 3.5 # There's a ton of noise here, due to small sample sizes lower_bound = -3.4 if model_name == 'DeepLearningRegressor': lower_bound = -24 if model_name == 'LGBMRegressor': lower_bound = -9.5 if model_name == 'GradientBoostingRegressor': lower_bound = -5.1 if model_name == 'CatBoostRegressor': lower_bound = -4.5 if model_name == 'XGBRegressor': lower_bound = -4.8 assert lower_bound < test_score < -2.75