Python calculate_brier_score_lossの例、utils_testing.calculate_brier_score_loss Pythonの例

コード例 #1

0

ファイルを表示

ファイル: ensemble_tests.py プロジェクト: zhanglae/auto_ml

def test_saving_basic_ensemble_classifier():
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )
    ml_predictor = utils.make_titanic_ensemble(df_titanic_train)

    file_name = ml_predictor.save(str(random.random()))

    with open(file_name, 'rb') as read_file:
        saved_ml_pipeline = dill.load(read_file)
    os.remove(file_name)

    probas = saved_ml_pipeline.predict_proba(df_titanic_test)
    probas = [proba[1] for proba in probas]
    # print(probas)

    test_score = utils.calculate_brier_score_loss(df_titanic_test.survived,
                                                  probas)
    print('test_score')
    print(test_score)

    # Very rough ensembles don't do as well on this problem as a standard GradientBoostingClassifier does
    # Right now we're getting a score of -.22
    # Make sure our score is good, but not unreasonably good
    assert -0.225 < test_score < -0.17

コード例 #2

0

ファイルを表示

ファイル: api_coverage_tests_classifiers.py プロジェクト: sanketh96/auto_ml

def test_binary_classification_predict_proba_on_Predictor_instance():
    np.random.seed(0)


    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset()
    ml_predictor = utils.train_basic_binary_classifier(df_titanic_train)

    #
    predictions = ml_predictor.predict_proba(df_titanic_test)
    predictions = [pred[1] for pred in predictions]
    test_score = utils.calculate_brier_score_loss(df_titanic_test.survived, predictions)
    # Make sure our score is good, but not unreasonably good
    print(test_score)
    assert -0.16 < test_score < -0.135

コード例 #3

0

ファイルを表示

def test_user_input_func_classification(model_name=None):
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )

    def age_bucketing(data):
        def define_buckets(age):
            if age <= 17:
                return 'youth'
            elif age <= 40:
                return 'adult'
            elif age <= 60:
                return 'adult2'
            else:
                return 'over_60'

        if isinstance(data, dict):
            data['age_bucket'] = define_buckets(data['age'])
        else:
            data['age_bucket'] = data.age.apply(define_buckets)

        return data

    column_descriptions = {
        'survived': 'output',
        'embarked': 'categorical',
        'pclass': 'categorical',
        'age_bucket': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier',
                             column_descriptions=column_descriptions)

    ml_predictor.train(df_titanic_train,
                       perform_feature_scaling=False,
                       user_input_func=age_bucketing,
                       model_names=model_name)

    file_name = ml_predictor.save(str(random.random()))

    # if model_name == 'DeepLearningClassifier':
    #     from auto_ml.utils_models import load_keras_model

    #     saved_ml_pipeline = load_keras_model(file_name)
    # else:
    #     with open(file_name, 'rb') as read_file:
    #         saved_ml_pipeline = dill.load(read_file)
    saved_ml_pipeline = load_ml_model(file_name)

    os.remove(file_name)
    try:
        keras_file_name = file_name[:-5] + '_keras_deep_learning_model.h5'
        os.remove(keras_file_name)
    except:
        pass

    df_titanic_test_dictionaries = df_titanic_test.to_dict('records')

    # 1. make sure the accuracy is the same

    predictions = []
    for row in df_titanic_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict_proba(row)[1])

    print('predictions')
    print(predictions)

    first_score = utils.calculate_brier_score_loss(df_titanic_test.survived,
                                                   predictions)
    print('first_score')
    print(first_score)
    # Make sure our score is good, but not unreasonably good

    lower_bound = -0.215
    if model_name == 'DeepLearningClassifier':
        lower_bound = -0.237

    assert lower_bound < first_score < -0.17

    # 2. make sure the speed is reasonable (do it a few extra times)
    data_length = len(df_titanic_test_dictionaries)
    start_time = datetime.datetime.now()
    for idx in range(1000):
        row_num = idx % data_length
        saved_ml_pipeline.predict(df_titanic_test_dictionaries[row_num])
    end_time = datetime.datetime.now()
    duration = end_time - start_time

    print('duration.total_seconds()')
    print(duration.total_seconds())

    # It's very difficult to set a benchmark for speed that will work across all machines.
    # On my 2013 bottom of the line 15" MacBook Pro, this runs in about 0.8 seconds for 1000 predictions
    # That's about 1 millisecond per prediction
    # Assuming we might be running on a test box that's pretty weak, multiply by 3
    # Also make sure we're not running unreasonably quickly
    assert 0.2 < duration.total_seconds() < 15

    # 3. make sure we're not modifying the dictionaries (the score is the same after running a few experiments as it is the first time)

    predictions = []
    for row in df_titanic_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict_proba(row)[1])

    print('predictions')
    print(predictions)
    print('df_titanic_test_dictionaries')
    print(df_titanic_test_dictionaries)
    second_score = utils.calculate_brier_score_loss(df_titanic_test.survived,
                                                    predictions)
    print('second_score')
    print(second_score)
    # Make sure our score is good, but not unreasonably good

    assert lower_bound < second_score < -0.17

コード例 #4

0

ファイルを表示

def test_feature_learning_getting_single_predictions_classification(
        model_name=None):
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )

    column_descriptions = {
        'survived': 'output',
        'sex': 'categorical',
        'embarked': 'categorical',
        'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier',
                             column_descriptions=column_descriptions)

    # NOTE: this is bad practice to pass in our same training set as our fl_data set, but we don't have enough data to do it any other way
    df_titanic_train, fl_data = train_test_split(df_titanic_train,
                                                 test_size=0.2)
    ml_predictor.train(df_titanic_train,
                       model_names=model_name,
                       feature_learning=True,
                       fl_data=fl_data)

    file_name = ml_predictor.save(str(random.random()))

    saved_ml_pipeline = load_ml_model(file_name)

    os.remove(file_name)
    try:
        keras_file_name = file_name[:-5] + '_keras_deep_learning_model.h5'
        os.remove(keras_file_name)
    except:
        pass

    df_titanic_test_dictionaries = df_titanic_test.to_dict('records')

    # 1. make sure the accuracy is the same

    predictions = []
    for row in df_titanic_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict_proba(row)[1])

    print('predictions')
    print(predictions)

    first_score = utils.calculate_brier_score_loss(df_titanic_test.survived,
                                                   predictions)
    print('first_score')
    print(first_score)
    # Make sure our score is good, but not unreasonably good

    lower_bound = -0.16
    if model_name == 'DeepLearningClassifier':
        lower_bound = -0.187

    assert lower_bound < first_score < -0.133

    # 2. make sure the speed is reasonable (do it a few extra times)
    data_length = len(df_titanic_test_dictionaries)
    start_time = datetime.datetime.now()
    for idx in range(1000):
        row_num = idx % data_length
        saved_ml_pipeline.predict(df_titanic_test_dictionaries[row_num])
    end_time = datetime.datetime.now()
    duration = end_time - start_time

    print('duration.total_seconds()')
    print(duration.total_seconds())

    # It's very difficult to set a benchmark for speed that will work across all machines.
    # On my 2013 bottom of the line 15" MacBook Pro, this runs in about 0.8 seconds for 1000 predictions
    # That's about 1 millisecond per prediction
    # Assuming we might be running on a test box that's pretty weak, multiply by 3
    # Also make sure we're not running unreasonably quickly
    assert 0.2 < duration.total_seconds() < 15

    # 3. make sure we're not modifying the dictionaries (the score is the same after running a few experiments as it is the first time)

    predictions = []
    for row in df_titanic_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict_proba(row)[1])

    print('predictions')
    print(predictions)
    print('df_titanic_test_dictionaries')
    print(df_titanic_test_dictionaries)
    second_score = utils.calculate_brier_score_loss(df_titanic_test.survived,
                                                    predictions)
    print('second_score')
    print(second_score)
    # Make sure our score is good, but not unreasonably good

    assert lower_bound < second_score < -0.133

コード例 #5

0

ファイルを表示

ファイル: basic_tests.py プロジェクト: bitsam/auto_ml

def test_getting_single_predictions_classification():
    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )
    ml_predictor = utils.train_basic_binary_classifier(df_titanic_train)
    file_name = ml_predictor.save()

    with open(file_name, 'rb') as read_file:
        saved_ml_pipeline = dill.load(read_file)

    df_titanic_test_dictionaries = df_titanic_test.to_dict('records')

    # 1. make sure the accuracy is the same

    predictions = []
    for row in df_titanic_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict_proba(row)[1])

    print('predictions')
    print(predictions)

    first_score = utils.calculate_brier_score_loss(df_titanic_test.survived,
                                                   predictions)
    print('first_score')
    print(first_score)
    # Make sure our score is good, but not unreasonably good
    assert -0.215 < first_score < -0.17

    # 2. make sure the speed is reasonable (do it a few extra times)
    data_length = len(df_titanic_test_dictionaries)
    start_time = datetime.datetime.now()
    for idx in range(1000):
        row_num = idx % data_length
        saved_ml_pipeline.predict(df_titanic_test_dictionaries[row_num])
    end_time = datetime.datetime.now()
    duration = end_time - start_time

    print('duration.total_seconds()')
    print(duration.total_seconds())

    # It's very difficult to set a benchmark for speed that will work across all machines.
    # On my 2013 bottom of the line 15" MacBook Pro, this runs in about 0.8 seconds for 1000 predictions
    # That's about 1 millisecond per prediction
    # Assuming we might be running on a test box that's pretty weak, multiply by 3
    # Also make sure we're not running unreasonably quickly
    assert 0.2 < duration.total_seconds() < 3

    # 3. make sure we're not modifying the dictionaries (the score is the same after running a few experiments as it is the first time)

    predictions = []
    for row in df_titanic_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict_proba(row)[1])

    print('predictions')
    print(predictions)
    print('df_titanic_test_dictionaries')
    print(df_titanic_test_dictionaries)
    second_score = utils.calculate_brier_score_loss(df_titanic_test.survived,
                                                    predictions)
    print('second_score')
    print(second_score)
    # Make sure our score is good, but not unreasonably good
    assert -0.215 < second_score < -0.17

コード例 #6

0

ファイルを表示

def test_get_basic_ensemble_predictions_one_at_a_time_classifier():
    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset(
    )
    ml_predictor = utils.make_titanic_ensemble(df_titanic_train)
    file_name = ml_predictor.save()

    with open(file_name, 'rb') as read_file:
        saved_ml_pipeline = dill.load(read_file)

    df_titanic_test_dictionaries = df_titanic_test.to_dict('records')

    # These predictions take a while. So we'll cut out 80% of our data to make this run much faster
    df_titanic_test_dictionaries, df_titanic_test_dictionaries_ignored, df_titanic_test, df_titanic_test_ignored = train_test_split(
        df_titanic_test_dictionaries,
        df_titanic_test,
        train_size=0.05,
        random_state=0)

    # 1. make sure the accuracy is the same

    predictions = []
    for row in df_titanic_test_dictionaries:
        prediction = saved_ml_pipeline.predict_proba(row)
        predictions.append(prediction)

    first_score = utils.calculate_brier_score_loss(df_titanic_test.survived,
                                                   predictions)
    print('first_score')
    print(first_score)
    # Make sure our score is good, but not unreasonably good
    assert -0.235 < first_score < -0.17

    # 2. make sure the speed is reasonable (do it a few extra times)
    # data_length = len(df_titanic_test_dictionaries)
    # start_time = datetime.datetime.now()
    # for idx in range(1000):
    #     row_num = idx % data_length
    #     saved_ml_pipeline.predict(df_titanic_test_dictionaries[row_num])
    # end_time = datetime.datetime.now()
    # duration = end_time - start_time

    # print('duration.total_seconds()')
    # print(duration.total_seconds())

    # # It's very difficult to set a benchmark for speed that will work across all machines.
    # # On my 2013 bottom of the line 15" MacBook Pro, this runs in about 0.8 seconds for 1000 predictions
    # # That's about 1 millisecond per prediction
    # # Assuming we might be running on a test box that's pretty weak, multiply by 3
    # # Also make sure we're not running unreasonably quickly
    # assert 0.4 < duration.total_seconds() < 3

    # 3. make sure we're not modifying the dictionaries (the score is the same after running a few experiments as it is the first time)

    predictions = []
    for row in df_titanic_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict_proba(row))

    second_score = utils.calculate_brier_score_loss(df_titanic_test.survived,
                                                    predictions)
    # Make sure our score is good, but not unreasonably good
    assert -0.235 < second_score < -0.17

コード例 #7

0

ファイルを表示

def getting_single_predictions_classifier_test():
    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset()

    column_descriptions = {
        'survived': 'output'
        , 'sex': 'categorical'
        , 'embarked': 'categorical'
        , 'pclass': 'categorical'
        , 'age_bucket': 'categorical'
    }

    ensemble_config = [
        {
            'model_name': 'LGBMClassifier'
        }
        , {
            'model_name': 'RandomForestClassifier'
        }

    ]


    ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions)

    ml_predictor.train(df_titanic_train, ensemble_config=ensemble_config)


    file_name = ml_predictor.save(str(random.random()))

    saved_ml_pipeline = load_ml_model(file_name)

    os.remove(file_name)
    try:
        keras_file_name = file_name[:-5] + '_keras_deep_learning_model.h5'
        os.remove(keras_file_name)
    except:
        pass


    df_titanic_test_dictionaries = df_titanic_test.to_dict('records')

    # 1. make sure the accuracy is the same

    predictions = []
    for row in df_titanic_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict_proba(row)[1])

    print('predictions')
    print(predictions)

    first_score = utils.calculate_brier_score_loss(df_titanic_test.survived, predictions)
    print('first_score')
    print(first_score)
    # Make sure our score is good, but not unreasonably good

    lower_bound = -0.16

    assert -0.15 < first_score < -0.135

    # 2. make sure the speed is reasonable (do it a few extra times)
    data_length = len(df_titanic_test_dictionaries)
    start_time = datetime.datetime.now()
    for idx in range(1000):
        row_num = idx % data_length
        saved_ml_pipeline.predict(df_titanic_test_dictionaries[row_num])
    end_time = datetime.datetime.now()
    duration = end_time - start_time

    print('duration.total_seconds()')
    print(duration.total_seconds())

    # It's very difficult to set a benchmark for speed that will work across all machines.
    # On my 2013 bottom of the line 15" MacBook Pro, this runs in about 0.8 seconds for 1000 predictions
    # That's about 1 millisecond per prediction
    # Assuming we might be running on a test box that's pretty weak, multiply by 3
    # Also make sure we're not running unreasonably quickly
    assert 0.2 < duration.total_seconds() < 60


    # 3. make sure we're not modifying the dictionaries (the score is the same after running a few experiments as it is the first time)

    predictions = []
    for row in df_titanic_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict_proba(row)[1])

    print('predictions')
    print(predictions)
    print('df_titanic_test_dictionaries')
    print(df_titanic_test_dictionaries)
    second_score = utils.calculate_brier_score_loss(df_titanic_test.survived, predictions)
    print('second_score')
    print(second_score)
    # Make sure our score is good, but not unreasonably good

    assert -0.15 < second_score < -0.135

コード例 #8

0

ファイルを表示

ファイル: backwards_compatibility_test.py プロジェクト: Vamshi-dhar/auto_ml-1

def train_old_model():
    print('auto_ml_version')
    print(auto_ml_version)
    if auto_ml_version > '2.1.6':
        raise(TypeError)

    np.random.seed(0)

    df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset()

    column_descriptions = {
        'survived': 'output'
        , 'sex': 'categorical'
        , 'embarked': 'categorical'
        , 'pclass': 'categorical'
    }

    ml_predictor = Predictor(type_of_estimator='classifier', column_descriptions=column_descriptions)

    ml_predictor.train(df_titanic_train)

    file_name = ml_predictor.save('trained_ml_model_v_2_1_6.dill')

    saved_ml_pipeline = load_ml_model(file_name)

    df_titanic_test_dictionaries = df_titanic_test.to_dict('records')

    # 1. make sure the accuracy is the same

    predictions = []
    for row in df_titanic_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict_proba(row)[1])

    first_score = utils.calculate_brier_score_loss(df_titanic_test.survived, predictions)
    # Make sure our score is good, but not unreasonably good

    lower_bound = -0.16

    assert -0.16 < first_score < -0.135

    # 2. make sure the speed is reasonable (do it a few extra times)
    data_length = len(df_titanic_test_dictionaries)
    start_time = datetime.datetime.now()
    for idx in range(1000):
        row_num = idx % data_length
        saved_ml_pipeline.predict(df_titanic_test_dictionaries[row_num])
    end_time = datetime.datetime.now()
    duration = end_time - start_time

    print('duration.total_seconds()')
    print(duration.total_seconds())

    # It's very difficult to set a benchmark for speed that will work across all machines.
    # On my 2013 bottom of the line 15" MacBook Pro, this runs in about 0.8 seconds for 1000 predictions
    # That's about 1 millisecond per prediction
    # Assuming we might be running on a test box that's pretty weak, multiply by 3
    # Also make sure we're not running unreasonably quickly
    assert 0.2 < duration.total_seconds() < 15


    # 3. make sure we're not modifying the dictionaries (the score is the same after running a few experiments as it is the first time)

    predictions = []
    for row in df_titanic_test_dictionaries:
        predictions.append(saved_ml_pipeline.predict_proba(row)[1])

    second_score = utils.calculate_brier_score_loss(df_titanic_test.survived, predictions)
    # Make sure our score is good, but not unreasonably good

    assert -0.16 < second_score < -0.135

コード例 #9

0

ファイルを表示

ファイル: backwards_compatibility_test.py プロジェクト: Vamshi-dhar/auto_ml-1

    def test_backwards_compatibility_with_version_2_1_6():
        np.random.seed(0)
        print('auto_ml_version')
        print(auto_ml_version)
        if auto_ml_version <= '2.9.0':
            raise(TypeError)

        df_titanic_train, df_titanic_test = utils.get_titanic_binary_classification_dataset()

        saved_ml_pipeline = load_ml_model(os.path.join('tests', 'backwards_compatibility_tests', 'trained_ml_model_v_2_1_6.dill'))

        df_titanic_test_dictionaries = df_titanic_test.to_dict('records')

        # 1. make sure the accuracy is the same

        predictions = []
        for row in df_titanic_test_dictionaries:
            predictions.append(saved_ml_pipeline.predict_proba(row)[1])

        print('predictions')
        print(predictions)

        first_score = utils.calculate_brier_score_loss(df_titanic_test.survived, predictions)
        print('first_score')
        print(first_score)
        # Make sure our score is good, but not unreasonably good

        lower_bound = -0.215

        assert lower_bound < first_score < -0.17

        # 2. make sure the speed is reasonable (do it a few extra times)
        data_length = len(df_titanic_test_dictionaries)
        start_time = datetime.datetime.now()
        for idx in range(1000):
            row_num = idx % data_length
            saved_ml_pipeline.predict(df_titanic_test_dictionaries[row_num])
        end_time = datetime.datetime.now()
        duration = end_time - start_time

        print('duration.total_seconds()')
        print(duration.total_seconds())

        # It's very difficult to set a benchmark for speed that will work across all machines.
        # On my 2013 bottom of the line 15" MacBook Pro, this runs in about 0.8 seconds for 1000 predictions
        # That's about 1 millisecond per prediction
        # Assuming we might be running on a test box that's pretty weak, multiply by 3
        # Also make sure we're not running unreasonably quickly
        assert 0.2 < duration.total_seconds() < 15


        # 3. make sure we're not modifying the dictionaries (the score is the same after running a few experiments as it is the first time)

        predictions = []
        for row in df_titanic_test_dictionaries:
            predictions.append(saved_ml_pipeline.predict_proba(row)[1])

        print('predictions')
        print(predictions)
        print('df_titanic_test_dictionaries')
        print(df_titanic_test_dictionaries)
        second_score = utils.calculate_brier_score_loss(df_titanic_test.survived, predictions)
        print('second_score')
        print(second_score)
        # Make sure our score is good, but not unreasonably good

        assert lower_bound < second_score < -0.17