def test_calibrate_uncertainty(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } df_boston_train, uncertainty_data = train_test_split(df_boston_train, test_size=0.5) uncertainty_data, uncertainty_calibration_data = train_test_split(uncertainty_data, test_size=0.5) ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) uncertainty_calibration_settings = { 'num_buckets': 3 , 'percentiles': [25, 50, 75] } ml_predictor.train(df_boston_train, perform_feature_selection=True, train_uncertainty_model=True, uncertainty_data=uncertainty_data, calibrate_uncertainty=True, uncertainty_calibration_settings=uncertainty_calibration_settings, uncertainty_calibration_data=uncertainty_calibration_data) uncertainty_score = ml_predictor.predict_uncertainty(df_boston_test) assert 'percentile_25_delta' in list(uncertainty_score.columns) assert 'percentile_50_delta' in list(uncertainty_score.columns) assert 'percentile_75_delta' in list(uncertainty_score.columns) assert 'bucket_num' in list(uncertainty_score.columns)
def test_perform_feature_scaling_false_classification(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset() column_descriptions = { 'survived': 'output' , 'embarked': 'categorical' , 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, perform_feature_scaling=False, model_names=model_name) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) lower_bound = -0.215 if model_name == 'DeepLearningClassifier': lower_bound = -0.235 assert lower_bound < test_score < -0.17
def test_include_bad_y_vals_predict_classification(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset() column_descriptions = { 'survived': 'output' , 'embarked': 'categorical' , 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) df_titanic_test.ix[1, 'survived'] = float('nan') df_titanic_test.ix[8, 'survived'] = float('inf') df_titanic_test.ix[26, 'survived'] = None ml_predictor.train(df_titanic_train, model_names=model_name) test_score = ml_predictor.score(df_titanic_test.to_dict('records'), df_titanic_test.survived) print('test_score') print(test_score) assert -0.215 < test_score < -0.17
def test_perform_feature_selection_true_regression(model_name=None): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, perform_feature_selection=True, model_names=model_name) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) # Bumping this up since without these features our score drops lower_bound = -4.0 if model_name == 'DeepLearningRegressor': lower_bound = -14.5 if model_name == 'LGBMRegressor': lower_bound = -4.95 assert lower_bound < test_score < -2.8
def test_select_from_multiple_classification_models_using_X_test_and_y_test(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, model_names=[ 'LogisticRegression', 'RandomForestClassifier', 'RidgeClassifier', 'GradientBoostingClassifier', 'ExtraTreesClassifier', 'AdaBoostClassifier', 'SGDClassifier', 'Perceptron', 'PassiveAggressiveClassifier' ], X_test=df_titanic_test, y_test=df_titanic_test.survived) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.215 < test_score < -0.17
def test_all_algos_regression(): # a random seed of 42 has ExtraTreesRegressor getting the best CV score, and that model doesn't generalize as well as GradientBoostingRegressor. np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train( df_boston_train, model_names=[ 'LinearRegression', 'RandomForestRegressor', 'Ridge', 'GradientBoostingRegressor', 'ExtraTreesRegressor', 'AdaBoostRegressor', 'SGDRegressor', 'PassiveAggressiveRegressor', 'Lasso', 'LassoLars', 'ElasticNet', 'OrthogonalMatchingPursuit', 'BayesianRidge', 'ARDRegression', 'MiniBatchKMeans', 'DeepLearningRegressor' ]) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -3.35 < test_score < -2.8
def test_unmarked_date_column(): np.random.seed(0) df_twitter_train, df_twitter_test = utils.get_twitter_sentiment_multilabel_classification_dataset( ) column_descriptions = { 'airline_sentiment': 'output', 'airline': 'categorical', 'text': 'nlp', 'tweet_location': 'categorical', 'user_timezone': 'categorical' # tweet_created is our date column. We want to test that "forgetting" to mark it as a date column throws a user warning before throwing the error # , 'tweet_created': 'date' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) with warnings.catch_warnings(record=True) as w: try: ml_predictor.train(df_twitter_train) except TypeError as e: pass print('Here are the caught warnings:') print(w) assert len(w) == 1
def test_linear_model_analytics_classification(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'sex': 'categorical', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, model_names='RidgeClassifier') test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) # Linear models aren't super great on this dataset... assert -0.21 < test_score < -0.17
def test_pass_in_list_of_dictionaries_predict_classification(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) list_titanic_train = df_titanic_train.to_dict('records') ml_predictor.train(df_titanic_train) test_score = ml_predictor.score(df_titanic_test.to_dict('records'), df_titanic_test.survived) print('test_score') print(test_score) assert -0.215 < test_score < -0.17
def ensemble_regressor_basic_test(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } ensemble_config = [ { 'model_name': 'LGBMRegressor' } , { 'model_name': 'RandomForestRegressor' } ] ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, ensemble_config=None) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -3.0 < test_score < -2.8
def test_all_algos_classification(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train( df_titanic_train, model_names=[ 'LogisticRegression', 'RandomForestClassifier', 'RidgeClassifier', 'GradientBoostingClassifier', 'ExtraTreesClassifier', 'AdaBoostClassifier', 'SGDClassifier', 'Perceptron', 'PassiveAggressiveClassifier', 'DeepLearningClassifier', 'XGBClassifier', 'LGBMClassifier' ]) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.215 < test_score < -0.17
def test_nlp_multilabel_classification(): np.random.seed(0) df_twitter_train, df_twitter_test = utils.get_twitter_sentiment_multilabel_classification_dataset( ) column_descriptions = { 'airline_sentiment': 'output', 'airline': 'categorical', 'text': 'nlp', 'tweet_location': 'categorical', 'user_timezone': 'categorical', 'tweet_created': 'date' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_twitter_train) test_score = ml_predictor.score(df_twitter_test, df_twitter_test.airline_sentiment, verbose=0) # Make sure our score is good, but not unreasonably good print('test_score') print(test_score) assert 0.67 < test_score < 0.79
def optimize_final_model_regression(model_name=None): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, optimize_final_model=True, model_names=model_name) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) # the random seed gets a score of -3.21 on python 3.5 # There's a ton of noise here, due to small sample sizes lower_bound = -3.4 if model_name == 'DeepLearningRegressor': lower_bound = -13 if model_name == 'LGBMRegressor': lower_bound = -5.5 if model_name == 'GradientBoostingRegressor': lower_bound = -3.5 assert lower_bound < test_score < -2.8
def categorical_ensembling_regression(model_name=None): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train_categorical_ensemble(df_boston_train, perform_feature_selection=True, model_names=model_name, categorical_column='CHAS') test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) # Bumping this up since without these features our score drops lower_bound = -4.0 if model_name == 'DeepLearningRegressor': # NOTE: the model fails to learn for one of the classes. might be worth looking into more lower_bound = -16 if model_name == 'LGBMRegressor': lower_bound = -4.95 assert lower_bound < test_score < -2.8
def test_throws_warning_when_fl_data_equals_df_train(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'sex': 'categorical', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) with warnings.catch_warnings(record=True) as w: try: ml_predictor.train(df_titanic_train, feature_learning=True, fl_data=df_titanic_train) except KeyError as e: pass # We should not be getting to this line- we should be throwing an error above for thing in w: print(thing) assert len(w) >= 1 assert True
def test_verify_features_finds_no_missing_features_when_none_are_missing(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'sex': 'categorical', 'embarked': 'categorical', 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, verify_features=True) file_name = ml_predictor.save(str(random.random())) with open(file_name, 'rb') as read_file: saved_ml_pipeline = dill.load(read_file) os.remove(file_name) missing_features = saved_ml_pipeline.named_steps[ 'final_model'].verify_features(df_titanic_test) print('missing_features') print(missing_features) print("len(missing_features['prediction_not_training'])") print(len(missing_features['prediction_not_training'])) print("len(missing_features['training_not_prediction'])") print(len(missing_features['training_not_prediction'])) assert len(missing_features['prediction_not_training']) == 0 assert len(missing_features['training_not_prediction']) == 0
def test_categorical_ensemble_basic_classifier(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) column_descriptions = { 'survived': 'output', 'pclass': 'categorical', 'embarked': 'categorical', 'sex': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train_categorical_ensemble(df_titanic_train, categorical_column='pclass', optimize_final_model=False) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) # Small sample sizes mean there's a fair bit of noise here assert -0.155 < test_score < -0.135
def ensemble_classifier_basic_test(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset() column_descriptions = { 'survived': 'output' , 'sex': 'categorical' , 'embarked': 'categorical' , 'pclass': 'categorical' } ensemble_config = [ { 'model_name': 'LGBMClassifier' } , { 'model_name': 'RandomForestClassifier' } ] ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train, ensemble_config=ensemble_config) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived) print('test_score') print(test_score) assert -0.15 < test_score < -0.131
def test_prediction_intervals_actually_work(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, predict_intervals=[0.05, 0.95]) df_boston_test = df_boston_test.reset_index(drop=True) intervals = ml_predictor.predict_intervals(df_boston_test) actuals = df_boston_test.MEDV count_under = 0 count_over = 0 # print(intervals) for idx, row in intervals.iterrows(): actual = actuals.iloc[idx] if actual < row['interval_0.05']: count_under += 1 if actual > row['interval_0.95']: count_over += 1 len_intervals = len(intervals) pct_under = count_under * 1.0 / len_intervals pct_over = count_over * 1.0 / len_intervals # There's a decent bit of noise since this is such a small dataset assert pct_under < 0.15 assert pct_over < 0.1
def train_basic_regressor(df_boston_train): column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, verbose=False) return ml_predictor
def train_basic_binary_classifier(df_titanic_train): column_descriptions = { 'survived': 'output' , 'embarked': 'categorical' , 'pclass': 'categorical' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_titanic_train) return ml_predictor
def train_basic_multilabel_classifier(df_twitter_train): column_descriptions = { 'airline_sentiment': 'output' , 'airline': 'categorical' , 'text': 'ignore' , 'tweet_location': 'categorical' , 'user_timezone': 'categorical' , 'tweet_created': 'date' } ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions) ml_predictor.train(df_twitter_train) return ml_predictor
def __init__(self): # self.trained_ml_pipeline = Sequential() self.trained_ml_pipeline = Predictor( type_of_estimator='regressor', column_descriptions=column_descriptions) with open(file_name, 'rb') as read_file: datastruct = dill.load(read_file) for step in datastruct.named_steps: pipeline_step = datastruct.named_steps[step] if pipeline_step.get( 'model_name', 'reallylongnonsensicalstring')[:12] == 'DeepLearning': keras_file_name = file_name[:-5] + pipeline_step.model + '_keras_deep_learning_model.h5' print(keras_file_name) self.trained_ml_pipeline = load_model(keras_file_name)
def test_predict_intervals_should_fail_if_not_trained(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train) try: intervals = ml_predictor.predict_intervals(df_boston_test) assert False except ValueError: assert True
def test_compare_all_models_regression(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, compare_all_models=True) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -3.2 < test_score < -2.8
def test_perform_feature_selection_false_regression(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, perform_feature_selection=False) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -3.2 < test_score < -2.8
def test_all_algos_regression(): df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, model_names=['LinearRegression', 'RandomForestRegressor', 'Ridge', 'GradientBoostingRegressor', 'ExtraTreesRegressor', 'AdaBoostRegressor', 'SGDRegressor', 'PassiveAggressiveRegressor']) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -3.25 < test_score < -2.8
def test_compute_power_1_regression(): df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = { 'MEDV': 'output' , 'CHAS': 'categorical' } ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, compute_power=1) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) assert -3.2 < test_score < -2.8
def test_prediction_intervals_lets_the_user_specify_number_of_intervals(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, predict_intervals=True, prediction_intervals=[.2]) intervals = ml_predictor.predict_intervals(df_boston_test, return_type='list') assert len(intervals[0]) == 2
def test_perform_feature_selection_true_regression(): np.random.seed(0) df_boston_train, df_boston_test = utils.get_boston_regression_dataset() column_descriptions = {'MEDV': 'output', 'CHAS': 'categorical'} ml_predictor = Predictor(type_of_estimator='regressor', column_descriptions=column_descriptions) ml_predictor.train(df_boston_train, perform_feature_selection=True) test_score = ml_predictor.score(df_boston_test, df_boston_test.MEDV) print('test_score') print(test_score) # Bumping this up since without these features our score drops assert -4.0 < test_score < -2.8