Esempio n. 1
0
def test_determine_pairs_to_do():
    X, y = load_iris()

    # Train a model and make predictions
    X_train, X_test, y_train, y_test = model_selection.train_test_split(
        X, y, shuffle=True
    )
    model = pipeline.make_pipeline(
        preprocessing.StandardScaler(),
        linear_model.LogisticRegression(solver="lbfgs", multi_class="auto"),
    )
    model.fit(X_train, y_train)
    y_pred = model.predict_proba(X_test)
    y_pred = pd.DataFrame(y_pred, columns=model.classes_)

    # With memoization

    explainer = ethik.ClassificationExplainer(memoize=True, n_jobs=1)

    to_do = explainer._determine_pairs_to_do(["sepal length (cm)"], ["setosa"])
    assert to_do == {"sepal length (cm)": ["setosa"]}

    explainer.explain_bias(X_test["sepal length (cm)"], y_pred["setosa"])

    to_do = explainer._determine_pairs_to_do(["sepal length (cm)"], ["setosa"])
    assert to_do == {}

    to_do = explainer._determine_pairs_to_do(
        ["sepal length (cm)", "petal width (cm)"], ["setosa", "virginica"]
    )
    assert to_do == {
        "petal width (cm)": ["setosa", "virginica"],
        "sepal length (cm)": ["virginica"],
    }
def test_target_identity():
    X, y = load_iris()

    X_train, X_test, y_train, y_test = model_selection.train_test_split(
        X, y, shuffle=True, random_state=42)

    model = pipeline.make_pipeline(preprocessing.StandardScaler(),
                                   neighbors.KNeighborsClassifier())
    model.fit(X_train, y_train)

    y_pred = model.predict_proba(X_test)
    y_pred = pd.DataFrame(y_pred, columns=model.classes_)

    explainer = ethik.ClassificationExplainer(memoize=True)
    label = y_pred.columns[0]

    explainer.explain_influence(X_test=X_test, y_pred=y_pred[label])

    explainer.explain_influence(X_test=X_test["petal width (cm)"],
                                y_pred=y_pred[label])

    rows = explainer.info.query(
        f"label == '{label}' and feature == 'petal width (cm)' and tau == -0.85"
    )
    assert len(rows) == 1
Esempio n. 3
0
def setup_adult():
    url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data"
    names = [
        "age",
        "workclass",
        "fnlwgt",
        "education",
        "education-num",
        "marital-status",
        "occupation",
        "relationship",
        "race",
        "gender",
        "capital-gain",
        "capital-loss",
        "hours-per-week",
        "native-country",
        "salary",
    ]
    dtypes = {
        "workclass": "category",
        "education": "category",
        "marital-status": "category",
        "occupation": "category",
        "relationship": "category",
        "race": "category",
        "gender": "category",
        "native-country": "category",
    }

    X = pd.read_csv(url, names=names, header=None, dtype=dtypes)
    X["gender"] = (X["gender"].str.strip().astype("category")
                   )  # Remove leading whitespace
    y = X.pop("salary").map({" <=50K": False, " >50K": True})

    X_train, X_test, y_train, y_test = model_selection.train_test_split(
        X, y, shuffle=True, random_state=42)

    model = lgb.LGBMClassifier(random_state=42).fit(X_train, y_train)
    y_pred = model.predict_proba(X_test)[:, 1]
    y_pred = pd.Series(y_pred, name=">$50k")

    explainer = ethik.ClassificationExplainer()

    return explainer, X_test, y_pred, y_test
Esempio n. 4
0
#print(X_test['averageAll'])

classifier = svm.SVC(kernel='linear', gamma='auto', C=2, probability=True)
classifier.fit(X_train, y_train)
y_predict = classifier.predict(X_test)


y_pred = classifier.predict_proba(X_test)[:, 1]
y_pred = pd.Series(y_pred, name='averageAll')



import ethik

explainer = ethik.ClassificationExplainer()

print(y_pred)

#print(X_test['averageAll'])


#explainer.plot_influence(X_test=X_test, y_pred=y_predict).show()

#classifier.predict_proba

#evaluation
from sklearn.metrics import classification_report


Esempio n. 5
0
def test_check_tol():
    for tol in (-1, 0):
        with pytest.raises(ValueError):
            ethik.ClassificationExplainer(tol=tol)
Esempio n. 6
0
def test_check_max_iterations():
    for max_iterations in (-1, 0):
        with pytest.raises(ValueError):
            ethik.ClassificationExplainer(max_iterations=max_iterations)
Esempio n. 7
0
def test_check_conf_level():
    for conf_level in (-1, 0, 0.5):
        with pytest.raises(ValueError):
            ethik.ClassificationExplainer(conf_level=conf_level)
Esempio n. 8
0
def test_check_sample_frac():
    for sample_frac in (-1, 0, 1):
        with pytest.raises(ValueError):
            ethik.ClassificationExplainer(sample_frac=sample_frac)
Esempio n. 9
0
def test_check_n_samples():
    for n_samples in (-1, 0):
        with pytest.raises(ValueError):
            ethik.ClassificationExplainer(n_samples=n_samples)
Esempio n. 10
0
def test_check_alpha():
    for alpha in (-1, -0.1, 0.5, 1):
        with pytest.raises(ValueError):
            ethik.ClassificationExplainer(alpha=alpha)