def test_verify_features_does_not_work_by_default(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) ml_predictor = utils.train_basic_binary_classifier(df_titanic_train) file_name = ml_predictor.save(str(random.random())) with open(file_name, 'rb') as read_file: saved_ml_pipeline = dill.load(read_file) os.remove(file_name) try: keras_file_name = file_name[:-5] + '_keras_deep_learning_model.h5' os.remove(keras_file_name) except: pass with warnings.catch_warnings(record=True) as w: results = saved_ml_pipeline.named_steps['final_model'].verify_features( df_titanic_test) print('Here are the caught warnings:') print(w) assert len(w) == 1 assert results == None
def test_binary_classification(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) ml_predictor = utils.train_basic_binary_classifier(df_titanic_train) test_score = ml_predictor.score(df_titanic_test, df_titanic_test.survived, verbose=0) # Right now we're getting a score of -.205 # Make sure our score is good, but not unreasonably good assert -0.215 < test_score < -0.17
def test_binary_classification_predict_on_Predictor_instance(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) ml_predictor = utils.train_basic_binary_classifier(df_titanic_train) predictions = ml_predictor.predict(df_titanic_test) test_score = accuracy_score(predictions, df_titanic_test.survived) # Make sure our score is good, but not unreasonably good print(test_score) assert .77 < test_score < .805
def test_saving_trained_pipeline_binary_classification(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) ml_predictor = utils.train_basic_binary_classifier(df_titanic_train) file_name = ml_predictor.save() with open(file_name, 'rb') as read_file: saved_ml_pipeline = dill.load(read_file) test_score = saved_ml_pipeline.score(df_titanic_test, df_titanic_test.survived) # Right now we're getting a score of -.205 assert -0.215 < test_score < -0.17
def test_binary_classification_predict_on_Predictor_instance(model_name=None): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) ml_predictor = utils.train_basic_binary_classifier(df_titanic_train) # predictions = ml_predictor.predict(df_titanic_test) test_score = accuracy_score(predictions, df_titanic_test.survived) # Right now we're getting a score of -.205 # Make sure our score is good, but not unreasonably good print(test_score) assert .65 < test_score < .75
def test_binary_classification_predict_proba_on_Predictor_instance(): np.random.seed(0) df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset() ml_predictor = utils.train_basic_binary_classifier(df_titanic_train) # predictions = ml_predictor.predict_proba(df_titanic_test) predictions = [pred[1] for pred in predictions] test_score = utils.calculate_brier_score_loss(df_titanic_test.survived, predictions) # Make sure our score is good, but not unreasonably good print(test_score) assert -0.16 < test_score < -0.135
def test_getting_single_predictions_classification(): df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset( ) ml_predictor = utils.train_basic_binary_classifier(df_titanic_train) file_name = ml_predictor.save() with open(file_name, 'rb') as read_file: saved_ml_pipeline = dill.load(read_file) df_titanic_test_dictionaries = df_titanic_test.to_dict('records') # 1. make sure the accuracy is the same predictions = [] for row in df_titanic_test_dictionaries: predictions.append(saved_ml_pipeline.predict_proba(row)[1]) print('predictions') print(predictions) first_score = utils.calculate_brier_score_loss(df_titanic_test.survived, predictions) print('first_score') print(first_score) # Make sure our score is good, but not unreasonably good assert -0.215 < first_score < -0.17 # 2. make sure the speed is reasonable (do it a few extra times) data_length = len(df_titanic_test_dictionaries) start_time = datetime.datetime.now() for idx in range(1000): row_num = idx % data_length saved_ml_pipeline.predict(df_titanic_test_dictionaries[row_num]) end_time = datetime.datetime.now() duration = end_time - start_time print('duration.total_seconds()') print(duration.total_seconds()) # It's very difficult to set a benchmark for speed that will work across all machines. # On my 2013 bottom of the line 15" MacBook Pro, this runs in about 0.8 seconds for 1000 predictions # That's about 1 millisecond per prediction # Assuming we might be running on a test box that's pretty weak, multiply by 3 # Also make sure we're not running unreasonably quickly assert 0.2 < duration.total_seconds() < 3 # 3. make sure we're not modifying the dictionaries (the score is the same after running a few experiments as it is the first time) predictions = [] for row in df_titanic_test_dictionaries: predictions.append(saved_ml_pipeline.predict_proba(row)[1]) print('predictions') print(predictions) print('df_titanic_test_dictionaries') print(df_titanic_test_dictionaries) second_score = utils.calculate_brier_score_loss(df_titanic_test.survived, predictions) print('second_score') print(second_score) # Make sure our score is good, but not unreasonably good assert -0.215 < second_score < -0.17