예제 #1
0
def test_skope_rules_works():
    # toy sample (the last two samples are outliers)
    X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [6, 3], [4, -7]]
    y = [0] * 6 + [1] * 2
    X_test = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [10, 5],
              [5, -7]]
    # Test LOF
    clf = SkopeRules(random_state=rng, max_samples=1.)
    clf.fit(X, y)
    decision_func = clf.decision_function(X_test)
    rules_vote = clf.rules_vote(X_test)
    separate_rules_score = clf.separate_rules_score(X_test)
    pred = clf.predict(X_test)
    # assert detect outliers:
    assert_greater(np.min(decision_func[-2:]), np.max(decision_func[:-2]))
    assert_greater(np.min(rules_vote[-2:]), np.max(rules_vote[:-2]))
    assert_greater(np.min(separate_rules_score[-2:]),
                   np.max(separate_rules_score[:-2]))
    assert_array_equal(pred, 6 * [0] + 2 * [1])
예제 #2
0
# This part shows how SkopeRules can be fitted to detect credit defaults.
# Performances are compared with the random forest model previously trained.

# fit the model

clf = SkopeRules(
    similarity_thres=.9, max_depth=3, max_features=0.5,
    max_samples_features=0.5, random_state=rng, n_estimators=30,
    feature_names=feature_names, recall_min=0.02, precision_min=0.6
    )
clf.fit(X_train, y_train)

# in the separate_rules_score method, a score of k means that rule number k
# vote positively, but not rules 1, ..., k-1. It will allow us to plot
# performance of each rule separately on ROC and PR plots.
scoring = clf.separate_rules_score(X_test)

print(str(len(clf.rules_)) + ' rules have been built.')
print('The most precise rules are the following:')
print(clf.rules_[:5])

curves = [roc_curve, precision_recall_curve]
xlabels = ['False Positive Rate', 'Recall (True Positive Rate)']
ylabels = ['True Positive Rate (Recall)', 'Precision']


fig, axes = plt.subplots(1, 2, figsize=(12, 5),
                         sharex=True, sharey=True)

ax = axes[0]
fpr, tpr, _ = roc_curve(y_test, scoring)