def test_specificity(self): # confusion matrix cm = rl.confusion_matrix(LINKS_TRUE, LINKS_PRED, len(FULL_INDEX)) assert rl.specificity(LINKS_TRUE, LINKS_PRED, len(FULL_INDEX)) == 1 / 2 assert rl.specificity(cm) == 1 / 2
def test_specificity(self): # confusion matrix cm = rl.confusion_matrix(LINKS_TRUE, LINKS_PRED, len(FULL_INDEX)) self.assertEqual( rl.specificity(LINKS_TRUE, LINKS_PRED, len(FULL_INDEX)), 1 / 2) self.assertEqual(rl.specificity(cm), 1 / 2)
def cross_val_score(classifier, comparison_vector, link_true, cv=5, method='fscore'): skfolds = StratifiedKFold(n_splits=cv) y = pandas.Series(0, index=comparison_vector.index) y.loc[link_true.index & comparison_vector.index] = 1 X_train = comparison_vector.values y_train = y.values scores = [] for train_index, test_index in skfolds.split(X_train, y_train): #clone_clf = clone(classifier) classifier_copy = copy.deepcopy(classifier) X_train_folds = comparison_vector.iloc[ train_index] #X_train[train_index] X_test_folds = comparison_vector.iloc[test_index] #X_train[test_index] y_train_folds = X_train_folds.index & link_true.index #y_train[train_index] y_test_folds = X_test_folds.index & link_true.index # Train the classifier #print(y_train_folds.shape) classifier_copy.fit(X_train_folds, y_train_folds) # predict matches for the test #print(X_test_folds) y_pred = classifier_copy.predict(X_test_folds) if (method == 'fscore'): score = recordlinkage.fscore(y_test_folds, y_pred) elif (method == 'precision'): score = recordlinkage.precision(y_test_folds, y_pred) elif (method == 'recall'): score = recordlinkage.recall(y_test_folds, y_pred) elif (method == 'accuracy'): score = recordlinkage.accuracy(y_test_folds, y_pred, len(comparison_vector)) elif (method == 'specificity'): score = recordlinkage.specificity(y_test_folds, y_pred, len(comparison_vector)) scores.append(score) scores = numpy.array(scores) return scores
def test_specificity(self): self.assertEqual(recordlinkage.specificity(CONF_M1), 1.0) self.assertEqual(recordlinkage.specificity(CONF_M5), 0.0)