コード例 #1
0
 def test_cohort_filter_classification_outcome(self):
     X_train, X_test, y_train, y_test, numeric, categorical = \
         create_simple_titanic_data()
     feature_names = categorical + numeric
     clf = create_titanic_pipeline(X_train, y_train)
     categorical_features = categorical
     # the indexes 1, 2 correspond to false positives and false negatives
     filters = [{
         'arg': [1, 2],
         'column': CLASSIFICATION_OUTCOME,
         'method': 'includes'
     }]
     pred_y = clf.predict(X_test)
     validation_data = create_validation_data(X_test, y_test, pred_y)
     validation_filter = validation_data[PRED_Y] != validation_data[TRUE_Y]
     validation_data = validation_data.loc[validation_filter]
     validation_data = validation_data.drop(columns=PRED_Y)
     model_task = ModelTask.CLASSIFICATION
     run_error_analyzer(validation_data,
                        clf,
                        X_test,
                        y_test,
                        feature_names,
                        categorical_features,
                        model_task,
                        filters=filters)
コード例 #2
0
 def test_importances_titanic(self):
     X_train, X_test, y_train, y_test, numeric, categorical = \
         create_simple_titanic_data()
     feature_names = categorical + numeric
     clf = create_titanic_pipeline(X_train, y_train)
     categorical_features = categorical
     run_error_analyzer(clf, X_test, y_test, feature_names,
                        categorical_features)
コード例 #3
0
 def test_matrix_filter_titanic(self):
     x_train, x_test, y_train, y_test, numeric, categorical = \
         create_simple_titanic_data()
     feature_names = categorical + numeric
     clf = create_titanic_pipeline(x_train, y_train)
     categorical_features = categorical
     run_error_analyzer(clf, x_test, y_test, feature_names,
                        categorical_features)
コード例 #4
0
def titanic_simple():
    x_train, x_test, y_train, y_test, numeric, categorical = create_simple_titanic_data()
    yield {
        DatasetConstants.X_TRAIN: x_train,
        DatasetConstants.X_TEST: x_test,
        DatasetConstants.Y_TRAIN: y_train,
        DatasetConstants.Y_TEST: y_test,
        DatasetConstants.NUMERIC: numeric,
        DatasetConstants.CATEGORICAL: categorical
    }
コード例 #5
0
 def test_matrix_filter_titanic(self):
     (X_train, X_test, y_train, y_test, numeric,
      categorical) = create_simple_titanic_data()
     feature_names = categorical + numeric
     clf = create_titanic_pipeline(X_train, y_train)
     categorical_features = categorical
     run_error_analyzer(clf,
                        X_test,
                        y_test,
                        feature_names,
                        categorical_features,
                        model_task=ModelTask.CLASSIFICATION)
コード例 #6
0
 def test_cohort_filter_excludes(self):
     X_train, X_test, y_train, y_test, numeric, categorical = \
         create_simple_titanic_data()
     feature_names = categorical + numeric
     clf = create_titanic_pipeline(X_train, y_train)
     categorical_features = categorical
     # the indexes other than 0, 2 correspond to Q
     filters = [{'arg': [0, 2], 'column': EMBARKED, 'method': 'excludes'}]
     validation_data = create_validation_data(X_test, y_test)
     filter_embarked = X_test[EMBARKED].isin(['Q'])
     validation_data = validation_data.loc[filter_embarked]
     model_task = ModelTask.CLASSIFICATION
     run_error_analyzer(validation_data,
                        clf,
                        X_test,
                        y_test,
                        feature_names,
                        categorical_features,
                        model_task,
                        filters=filters)
コード例 #7
0
 def test_invalid_comparison_titanic(self, analyzer_type):
     (X_train, X_test, y_train, y_test, numeric,
      categorical) = create_simple_titanic_data()
     tree_features = [STRING_INDEX]
     feature_names = categorical + numeric + tree_features
     # Create a bad dummy string categorical feature
     X_train = add_string_index_col(X_train)
     X_test = add_string_index_col(X_test)
     clf = create_titanic_pipeline(X_train, y_train)
     categorical_features = categorical
     tree_features = tree_features + numeric
     with pytest.raises(TypeError) as ve:
         run_error_analyzer(clf,
                            X_test,
                            y_test,
                            feature_names,
                            analyzer_type,
                            categorical_features,
                            tree_features,
                            model_task=ModelTask.CLASSIFICATION)
     assert ('Column string_index of type string is incorrectly treated '
             'as numeric with threshold value') in str(ve.value)