def test_skope_rules_works(): # toy sample (the last two samples are outliers) X = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [6, 3], [4, -7]] y = [0] * 6 + [1] * 2 X_test = [[-2, -1], [-1, -1], [-1, -2], [1, 1], [1, 2], [2, 1], [10, 5], [5, -7]] # Test LOF clf = SkopeRules(random_state=rng, max_samples=1.) clf.fit(X, y) decision_func = clf.decision_function(X_test) rules_vote = clf.rules_vote(X_test) separate_rules_score = clf.separate_rules_score(X_test) pred = clf.predict(X_test) # assert detect outliers: assert_greater(np.min(decision_func[-2:]), np.max(decision_func[:-2])) assert_greater(np.min(rules_vote[-2:]), np.max(rules_vote[:-2])) assert_greater(np.min(separate_rules_score[-2:]), np.max(separate_rules_score[:-2])) assert_array_equal(pred, 6 * [0] + 2 * [1])
# This part shows how SkopeRules can be fitted to detect credit defaults. # Performances are compared with the random forest model previously trained. # fit the model clf = SkopeRules( similarity_thres=.9, max_depth=3, max_features=0.5, max_samples_features=0.5, random_state=rng, n_estimators=30, feature_names=feature_names, recall_min=0.02, precision_min=0.6 ) clf.fit(X_train, y_train) # in the separate_rules_score method, a score of k means that rule number k # vote positively, but not rules 1, ..., k-1. It will allow us to plot # performance of each rule separately on ROC and PR plots. scoring = clf.separate_rules_score(X_test) print(str(len(clf.rules_)) + ' rules have been built.') print('The most precise rules are the following:') print(clf.rules_[:5]) curves = [roc_curve, precision_recall_curve] xlabels = ['False Positive Rate', 'Recall (True Positive Rate)'] ylabels = ['True Positive Rate (Recall)', 'Precision'] fig, axes = plt.subplots(1, 2, figsize=(12, 5), sharex=True, sharey=True) ax = axes[0] fpr, tpr, _ = roc_curve(y_test, scoring)