def test_perform_feature_scaling_false_regression(): np.random.seed(42) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, perform_feature_scaling=False, model_names=['DeepLearningRegressor']) file_name = ml_predictor.save(str(random.random())) saved_ml_pipeline = utils_models.load_keras_model(file_name) # with open(file_name, 'rb') as read_file: # saved_ml_pipeline = dill.load(read_file) os.remove(file_name) test_score = saved_ml_pipeline.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -24 < test_score < -2.8
def test_perform_feature_scaling_false_regression(model_name=None): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, perform_feature_scaling=False, model_names=model_name) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) lower_bound = -3.2 if model_name == 'DeepLearningRegressor': lower_bound = -8.8 if model_name == 'LGBMRegressor': lower_bound = -4.95 assert lower_bound < test_score < -2.8
def test_is_backwards_compatible_with_models_trained_using_1_9_6(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() with open(os.path.join('tests', 'trained_ml_model_v_1_9_6.dill'), 'rb') as read_file: saved_ml_pipeline = dill.load(read_file) df_boston_test_dictionaries = df_boston_test.to_dict('records') # 1. make sure the accuracy is the same predictions = [] for row in df_boston_test_dictionaries: predictions.append(saved_ml_pipeline.predict(row)) print('predictions') print(predictions) print('predictions[0]') print(predictions[0]) print('type(predictions)') print(type(predictions)) first_score = utils.calculate_rmse(df_boston_test.MEDV, predictions) print('first_score') print(first_score) # Make sure our score is good, but not unreasonably good assert -2.8 < first_score < -2.1 # 2. make sure the speed is reasonable (do it a few extra times) data_length = len(df_boston_test_dictionaries) start_time = datetime.datetime.now() for idx in range(1000): row_num = idx % data_length saved_ml_pipeline.predict(df_boston_test_dictionaries[row_num]) end_time = datetime.datetime.now() duration = end_time - start_time print('duration.total_seconds()') print(duration.total_seconds()) # It's very difficult to set a benchmark for speed that will work across all machines. # On my 2013 bottom of the line 15" MacBook Pro, this runs in about 0.8 seconds for 1000 predictions # That's about 1 millisecond per prediction # Assuming we might be running on a test box that's pretty weak, multiply by 3 # Also make sure we're not running unreasonably quickly assert 0.1 < duration.total_seconds() / 1.0 < 15 # 3. make sure we're not modifying the dictionaries (the score is the same after running a few experiments as it is the first time) predictions = [] for row in df_boston_test_dictionaries: predictions.append(saved_ml_pipeline.predict(row)) second_score = utils.calculate_rmse(df_boston_test.MEDV, predictions) print('second_score') print(second_score) assert -2.8 < second_score < -2.1
def test_input_df_unmodified(): np.random.seed(42) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) df_shape = df_boston_train.shape ml_predictor.train(df_boston_train) training_shape = df_boston_train.shape assert training_shape[0] == df_shape[0] assert training_shape[1] == df_shape[1] test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -3.35 < test_score < -2.8
def categorical_ensembling_regression(model_name=None): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train_categorical_ensemble(df_boston_train, perform_feature_selection=True, model_names=model_name, categorical_column='CHAS') test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) # Bumping this up since without these features our score drops lower_bound = -4.0 if model_name == 'DeepLearningRegressor': lower_bound = -19 if model_name == 'LGBMRegressor': lower_bound = -4.95 assert lower_bound < test_score < -2.8
def ensemble_regressor_basic_test(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } ensemble_config = [ { 'model_name': 'LGBMRegressor' } , { 'model_name': 'RandomForestRegressor' } ] ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, ensemble_config=None) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -3.0 < test_score < -2.8
def test_prediction_intervals_actually_work(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, predict_intervals=[0.05, 0.95]) df_boston_test = df_boston_test.reset_index(drop=True) intervals = ml_predictor.predict_intervals(df_boston_test) actuals = df_boston_test.MEDV count_under = 0 count_over = 0 # print(intervals) for idx, row in intervals.iterrows(): actual = actuals.iloc[idx] if actual < row['interval_0.05']: count_under += 1 if actual > row['interval_0.95']: count_over += 1 len_intervals = len(intervals) pct_under = count_under * 1.0 / len_intervals pct_over = count_over * 1.0 / len_intervals # There's a decent bit of noise since this is such a small dataset assert pct_under < 0.15 assert pct_over < 0.1
def test_select_from_multiple_regression_models_using_X_test_and_y_test(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, model_names=[ 'LinearRegression', 'RandomForestRegressor', 'Ridge', 'GradientBoostingRegressor', 'ExtraTreesRegressor', 'AdaBoostRegressor', 'SGDRegressor', 'PassiveAggressiveRegressor' ], X_test=df_boston_test, y_test=df_boston_test.MEDV) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) # Due to the small sample size of this test set, GSCV may sometimes pick ExtraTreesRegressor as the best model, just very slightly beating out GradientBoostingRegressor. # ExtraTrees doesn't generalize as well, however, scoring a mere -3.20x something or other, and narrowly missing our cutoff from above. # Given that is is only an issue when running on tiny toy datasets, I'm not concerned for the use cases I intend to support, and thus, am bumping up the upper bound on our error metric ever so slightly assert -3.25 < test_score < -2.8
def test_score_uncertainty(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} df_boston_train, uncertainty_data = train_test_split(df_boston_train, test_size=0.5) ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, perform_feature_selection=True, train_uncertainty_model=True, uncertainty_data=uncertainty_data) uncertainty_score = ml_predictor.score_uncertainty(df_boston_test, df_boston_test.MEDV) print('uncertainty_score') print(uncertainty_score) assert uncertainty_score > -0.2
def optimize_final_model_regression(model_name=None): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, optimize_final_model=True, model_names=model_name) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) # the random seed gets a score of -3.21 on python 3.5 # There's a ton of noise here, due to small sample sizes lower_bound = -3.4 if model_name == 'DeepLearningRegressor': lower_bound = -20 if model_name == 'LGBMRegressor': lower_bound = -5.5 if model_name == 'GradientBoostingRegressor': lower_bound = -3.5 assert lower_bound < test_score < -2.8
def test_prediction_intervals_actually_work(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} df_boston_train, uncertainty_data = train_test_split(df_boston_train, test_size=0.5) ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, predict_intervals=True) intervals = ml_predictor.predict_intervals(df_boston_test) count_under = 0 count_over = 0 for row in intervals: if row[0] < row[1]: count_under += 1 if row[0] > row[3]: count_over += 1 len_intervals = len(intervals) pct_under = count_under * 1.0 / len_intervals pct_over = count_over * 1.0 / len_intervals # There's a decent bit of noise since this is such a small dataset assert pct_under < 0.1 assert pct_over < 0.1
def test_all_algos_regression(): # a random seed of 42 has ExtraTreesRegressor getting the best CV score, and that model doesn't generalize as well as GradientBoostingRegressor. np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train( df_boston_train, model_names=[ 'LinearRegression', 'RandomForestRegressor', 'Ridge', 'GradientBoostingRegressor', 'AdaBoostRegressor', 'SGDRegressor', 'PassiveAggressiveRegressor', 'Lasso', 'LassoLars', 'ElasticNet', 'OrthogonalMatchingPursuit', 'BayesianRidge', 'ARDRegression', 'MiniBatchKMeans', 'DeepLearningRegressor', 'LGBMRegressor', 'XGBClassifier', 'LinearSVR', 'CatBoostRegressor' ]) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -3.4 < test_score < -2.8
def test_calibrate_uncertainty(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} df_boston_train, uncertainty_data = train_test_split(df_boston_train, test_size=0.5) uncertainty_data, uncertainty_calibration_data = train_test_split( uncertainty_data, test_size=0.5) ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) uncertainty_calibration_settings = { 'num_buckets': 3, 'percentiles': [25, 50, 75] } ml_predictor.train( df_boston_train, perform_feature_selection=True, train_uncertainty_model=True, uncertainty_data=uncertainty_data, calibrate_uncertainty=True, uncertainty_calibration_settings=uncertainty_calibration_settings, uncertainty_calibration_data=uncertainty_calibration_data) uncertainty_score = ml_predictor.predict_uncertainty(df_boston_test) assert 'percentile_25_delta' in list(uncertainty_score.columns) assert 'percentile_50_delta' in list(uncertainty_score.columns) assert 'percentile_75_delta' in list(uncertainty_score.columns) assert 'bucket_num' in list(uncertainty_score.columns)
def test_all_algos_regression(): # a random seed of 42 has ExtraTreesRegressor getting the best CV score, and that model doesn't generalize as well as GradientBoostingRegressor. np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, model_names=[ 'LinearRegression', 'RandomForestRegressor', 'Ridge', 'GradientBoostingRegressor', 'ExtraTreesRegressor', 'AdaBoostRegressor', 'SGDRegressor', 'PassiveAggressiveRegressor' ]) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -3.25 < test_score < -2.8
def test_regression(): df_boston_train, df_boston_test = utils.get_boston_regression_dataset() ml_predictor = utils.train_basic_regressor(df_boston_train) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV, verbose=0) # Currently, we expect to get a score of -3.09 # Make sure our score is good, but not unreasonably good assert -3.2 < test_score < -2.8
def test_getting_single_predictions_regression(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() ml_predictor = utils.train_basic_regressor(df_boston_train) file_name = ml_predictor.save(str(random.random())) with open(file_name, 'rb') as read_file: saved_ml_pipeline = dill.load(read_file) os.remove(file_name) df_boston_test_dictionaries = df_boston_test.to_dict('records') # 1. make sure the accuracy is the same predictions = [] for row in df_boston_test_dictionaries: predictions.append(saved_ml_pipeline.predict(row)) first_score = utils.calculate_rmse(df_boston_test.MEDV, predictions) print('first_score') print(first_score) # Make sure our score is good, but not unreasonably good assert -3.2 < first_score < -2.8 # 2. make sure the speed is reasonable (do it a few extra times) data_length = len(df_boston_test_dictionaries) start_time = datetime.datetime.now() for idx in range(1000): row_num = idx % data_length saved_ml_pipeline.predict(df_boston_test_dictionaries[row_num]) end_time = datetime.datetime.now() duration = end_time - start_time print('duration.total_seconds()') print(duration.total_seconds()) # It's very difficult to set a benchmark for speed that will work across all machines. # On my 2013 bottom of the line 15" MacBook Pro, this runs in about 0.8 seconds for 1000 predictions # That's about 1 millisecond per prediction # Assuming we might be running on a test box that's pretty weak, multiply by 3 # Also make sure we're not running unreasonably quickly assert 0.2 < duration.total_seconds() / 1.0 < 3 # 3. make sure we're not modifying the dictionaries (the score is the same after running a few experiments as it is the first time) predictions = [] for row in df_boston_test_dictionaries: predictions.append(saved_ml_pipeline.predict(row)) second_score = utils.calculate_rmse(df_boston_test.MEDV, predictions) print('second_score') print(second_score) # Make sure our score is good, but not unreasonably good assert -3.2 < second_score < -2.8
def test_saving_trained_pipeline_regression(): df_boston_train, df_boston_test = utils.get_boston_regression_dataset() ml_predictor = utils.train_basic_regressor(df_boston_train) file_name = ml_predictor.save() with open(file_name, 'rb') as read_file: saved_ml_pipeline = dill.load(read_file) test_score = saved_ml_pipeline.score(df_boston_test, df_boston_test.MEDV) # Make sure our score is good, but not unreasonably good assert -3.2 < test_score < -2.8
def test_predict_uncertainty_true(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} df_boston_train, uncertainty_data = train_test_split(df_boston_train, test_size=0.5) ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, predict_intervals=True) intervals = ml_predictor.predict_intervals(df_boston_test) assert isinstance(intervals, pd.DataFrame) assert intervals.shape[0] == df_boston_test.shape[0] result_list = ml_predictor.predict_intervals(df_boston_test, return_type='list') assert isinstance(result_list, list) assert len(result_list) == df_boston_test.shape[0] for idx, row in enumerate(result_list): assert isinstance(row, list) assert len(row) == 3 singles = df_boston_test.head().to_dict('records') for row in singles: result = ml_predictor.predict_intervals(row) assert isinstance(result, dict) assert 'prediction' in result assert 'interval_0.05' in result assert 'interval_0.95' in result for row in singles: result = ml_predictor.predict_intervals(row, return_type='list') assert isinstance(result, list) assert len(result) == 3 df_intervals = ml_predictor.predict_intervals(df_boston_test, return_type='df') assert isinstance(df_intervals, pd.DataFrame) try: ml_predictor.predict_intervals(df_boston_test, return_type='this will not work') assert False except ValueError: assert True
def test_predict_intervals_should_fail_if_not_trained(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train) try: intervals = ml_predictor.predict_intervals(df_boston_test) assert False except ValueError: assert True
def test_prediction_intervals_lets_the_user_specify_number_of_intervals(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, predict_intervals=True, prediction_intervals=[.2]) intervals = ml_predictor.predict_intervals(df_boston_test, return_type='list') assert len(intervals[0]) == 2
def test_all_algos_regression(): df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, model_names=['LinearRegression', 'RandomForestRegressor', 'Ridge', 'GradientBoostingRegressor', 'ExtraTreesRegressor', 'AdaBoostRegressor', 'SGDRegressor', 'PassiveAggressiveRegressor']) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -3.25 < test_score < -2.8
def test_compute_power_1_regression(): df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, compute_power=1) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -3.2 < test_score < -2.8
def test_compare_all_models_regression(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, compare_all_models=True) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -3.2 < test_score < -2.8
def test_perform_feature_selection_false_regression(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, perform_feature_selection=False) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -3.2 < test_score < -2.8
def test_predict_uncertainty_returns_pandas_DataFrame_for_more_than_one_value(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } df_boston_train, uncertainty_data = train_test_split(df_boston_train, test_size=0.5) ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, perform_feature_selection=True, train_uncertainty_model=True, uncertainty_data=uncertainty_data) uncertainties = ml_predictor.predict_uncertainty(df_boston_test) assert isinstance(uncertainties, pd.DataFrame)
def test_perform_feature_selection_true_regression(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, perform_feature_selection=True) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) # Bumping this up since without these features our score drops assert -4.0 < test_score < -2.8
def test_optimize_final_model_regression(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, optimize_final_model=True) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) # the random seed gets a score of -3.21 on python 3.5 assert -3.25 < test_score < -2.8
def test_compute_power_1_regression(): np.random.seed(42) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, compute_power=1, model_names=['LGBMRegressor']) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -9.5 < test_score < -2.8
def test_perform_feature_scaling_true_regression(): np.random.seed(42) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, perform_feature_scaling=True, model_names=['DeepLearningRegressor']) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -24 < test_score < -2.8
def test_predict_uncertainty_returns_dict_for_one_value(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } df_boston_train, uncertainty_data = train_test_split(df_boston_train, test_size=0.5) ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, perform_feature_selection=True, train_uncertainty_model=True, uncertainty_data=uncertainty_data) test_list = df_boston_test.to_dict('records') for item in test_list: prediction = ml_predictor.predict_uncertainty(item) assert isinstance(prediction, dict)