def test_determine_pairs_to_do(): X, y = load_iris() # Train a model and make predictions X_train, X_test, y_train, y_test = model_selection.train_test_split( X, y, shuffle=True ) model = pipeline.make_pipeline( preprocessing.StandardScaler(), linear_model.LogisticRegression(solver="lbfgs", multi_class="auto"), ) model.fit(X_train, y_train) y_pred = model.predict_proba(X_test) y_pred = pd.DataFrame(y_pred, columns=model.classes_) # With memoization explainer = ethik.ClassificationExplainer(memoize=True, n_jobs=1) to_do = explainer._determine_pairs_to_do(["sepal length (cm)"], ["setosa"]) assert to_do == {"sepal length (cm)": ["setosa"]} explainer.explain_bias(X_test["sepal length (cm)"], y_pred["setosa"]) to_do = explainer._determine_pairs_to_do(["sepal length (cm)"], ["setosa"]) assert to_do == {} to_do = explainer._determine_pairs_to_do( ["sepal length (cm)", "petal width (cm)"], ["setosa", "virginica"] ) assert to_do == { "petal width (cm)": ["setosa", "virginica"], "sepal length (cm)": ["virginica"], }
def test_target_identity(): X, y = load_iris() X_train, X_test, y_train, y_test = model_selection.train_test_split( X, y, shuffle=True, random_state=42) model = pipeline.make_pipeline(preprocessing.StandardScaler(), neighbors.KNeighborsClassifier()) model.fit(X_train, y_train) y_pred = model.predict_proba(X_test) y_pred = pd.DataFrame(y_pred, columns=model.classes_) explainer = ethik.ClassificationExplainer(memoize=True) label = y_pred.columns[0] explainer.explain_influence(X_test=X_test, y_pred=y_pred[label]) explainer.explain_influence(X_test=X_test["petal width (cm)"], y_pred=y_pred[label]) rows = explainer.info.query( f"label == '{label}' and feature == 'petal width (cm)' and tau == -0.85" ) assert len(rows) == 1
def setup_adult(): url = "https://archive.ics.uci.edu/ml/machine-learning-databases/adult/adult.data" names = [ "age", "workclass", "fnlwgt", "education", "education-num", "marital-status", "occupation", "relationship", "race", "gender", "capital-gain", "capital-loss", "hours-per-week", "native-country", "salary", ] dtypes = { "workclass": "category", "education": "category", "marital-status": "category", "occupation": "category", "relationship": "category", "race": "category", "gender": "category", "native-country": "category", } X = pd.read_csv(url, names=names, header=None, dtype=dtypes) X["gender"] = (X["gender"].str.strip().astype("category") ) # Remove leading whitespace y = X.pop("salary").map({" <=50K": False, " >50K": True}) X_train, X_test, y_train, y_test = model_selection.train_test_split( X, y, shuffle=True, random_state=42) model = lgb.LGBMClassifier(random_state=42).fit(X_train, y_train) y_pred = model.predict_proba(X_test)[:, 1] y_pred = pd.Series(y_pred, name=">$50k") explainer = ethik.ClassificationExplainer() return explainer, X_test, y_pred, y_test
#print(X_test['averageAll']) classifier = svm.SVC(kernel='linear', gamma='auto', C=2, probability=True) classifier.fit(X_train, y_train) y_predict = classifier.predict(X_test) y_pred = classifier.predict_proba(X_test)[:, 1] y_pred = pd.Series(y_pred, name='averageAll') import ethik explainer = ethik.ClassificationExplainer() print(y_pred) #print(X_test['averageAll']) #explainer.plot_influence(X_test=X_test, y_pred=y_predict).show() #classifier.predict_proba #evaluation from sklearn.metrics import classification_report
def test_check_tol(): for tol in (-1, 0): with pytest.raises(ValueError): ethik.ClassificationExplainer(tol=tol)
def test_check_max_iterations(): for max_iterations in (-1, 0): with pytest.raises(ValueError): ethik.ClassificationExplainer(max_iterations=max_iterations)
def test_check_conf_level(): for conf_level in (-1, 0, 0.5): with pytest.raises(ValueError): ethik.ClassificationExplainer(conf_level=conf_level)
def test_check_sample_frac(): for sample_frac in (-1, 0, 1): with pytest.raises(ValueError): ethik.ClassificationExplainer(sample_frac=sample_frac)
def test_check_n_samples(): for n_samples in (-1, 0): with pytest.raises(ValueError): ethik.ClassificationExplainer(n_samples=n_samples)
def test_check_alpha(): for alpha in (-1, -0.1, 0.5, 1): with pytest.raises(ValueError): ethik.ClassificationExplainer(alpha=alpha)