def compare_abod_to_svm(): emb1 = load_file("embeddings_matthias.pkl") emb2 = load_file("embeddings_matthias_big.pkl") emb3 = load_file("embeddings_laia.pkl") emb4 = load_file("embeddings_christian.pkl") emb_lfw = load_file("embeddings_lfw.pkl") clf = SVC(kernel='linear', probability=True) clf2 = ABOD() # train user and unknown class label_class = np.repeat(1, np.shape(emb1[0:100])[0]) label_unknown = np.repeat(0, np.shape(emb_lfw)[0]) training_embeddings = np.concatenate((emb1[0:100], emb_lfw)) training_labels = np.concatenate((label_class, label_unknown)) # train svm clf.fit(training_embeddings, training_labels) # train abod clf2.fit(emb1[0:100]) # test on class prediction = clf.predict(emb2[0:100]) errors = len(emb2[0:100]) - np.sum(prediction) print "Error rate: {}%".format(float(errors) / len(emb2[0:100]) * 100.0) # test on similar class prediction = clf.predict(emb4) errors = np.sum(prediction) print "Error rate: {}%".format(float(errors) / len(emb4) * 100.0)
def __init__(self, test_offline=False, cluster=None): ABOD.__init__(self) self.__test_offline = test_offline if cluster is None: self.data_cluster = MeanShiftCluster() else: assert issubclass(cluster, ClusterBase) self.data_cluster = cluster
def generate_classifier(self): if self.CLASSIFIER == 'ABOD': return ABOD() elif self.CLASSIFIER == 'IABOD': return IABOD() elif self.CLASSIFIER == 'ISVM': return ISVM(self.__unknown_class_data)
def test_ABOD_1(): clf = ABOD() emb1 = load_embeddings("embeddings_elias.pkl") emb2 = load_embeddings("embeddings_matthias.pkl") emb3 = load_embeddings("embeddings_matthias_big.pkl") emb4 = load_embeddings("embeddings_laia.pkl") emb5 = load_embeddings("embeddings_christian.pkl") emb_lfw = load_embeddings("embeddings_lfw.pkl") # randomize data np.random.shuffle(emb2) # extract 99% variance subspace basis, mean = ExtractSubspace(emb2, 0.99) start = time.time() # reduce data data = ProjectOntoSubspace(emb2, mean, basis) dims = np.shape(data) # select minimum data to build convex hull min_nr_elems = dims[1] + 4 data_hull = data[0:min_nr_elems + 1, :] print np.shape(data) # calculate hull # # ----------- Delauny tesselation if False: hull = Delaunay(data_hull) # print (hull.find_simplex(data[10, :]) >= 0) elems_in_hull = np.sum( [1 if hull.find_simplex(sample) >= 0 else 0 for sample in data]) print "Elements inside hull: {} | Hull points: {}".format( elems_in_hull, len(data_hull)) # ----------- Convex hull (subgraph) if False: hull = ConvexHull(data_hull) # the vertices of the convex hull hull_points = hull.vertices # points inside hull print set(range(len(data_hull))).difference(hull.vertices) print "elements: {} | time: {}".format(min_nr_elems, time.time() - start)
def test_ABOD(): clf = ABOD() emb1 = load_embeddings("embeddings_elias.pkl") emb2 = load_embeddings("embeddings_matthias.pkl") emb3 = load_embeddings("embeddings_matthias_big.pkl") emb4 = load_embeddings("embeddings_laia.pkl") emb5 = load_embeddings("embeddings_christian.pkl") emb_lfw = load_embeddings("embeddings_lfw.pkl") clf.fit(emb2) # class_sample = emb3[100,:] # outlier_sample = emb1[30,:] # print class_sample start = time.time() abod_class = clf.predict_approx(emb3) print "time: ".format(time.time() - start) return abod_outliers = clf.predict(emb5) step = 0.0001 start = 0.005 stop = 0.6 il = [] ul = [] x = np.arange(start, stop, step) for thresh in x: il.append( float(len(abod_class[abod_class < thresh])) / len(abod_class) * 100.0) ul.append( float(len(abod_outliers[abod_outliers > thresh])) / len(abod_outliers) * 100.0) plt.plot(x, il, color='green', label="Inliers") plt.plot(x, ul, color='red', label="Outliers") plt.title("Classification Error") plt.xlabel("Threshold") plt.ylabel("Error [%]") plt.legend() plt.show() # thresh = 0.2 print "error il: {}/{} : {}%".format( len(abod_class[abod_class < 0.2]), len(abod_class), float(len(abod_class[abod_class < 0.2])) / len(abod_class) * 100.0) print "error ul: {}/{} : {}%".format( len(abod_outliers[abod_outliers > 0.2]), len(abod_outliers), float(len(abod_outliers[abod_outliers > 0.2])) / len(abod_outliers) * 100.0)
def test_against_threshold(): emb1 = load_file("embeddings_matthias.pkl") emb2 = load_file("embeddings_matthias_big.pkl") emb3 = load_file("embeddings_laia.pkl") # emb4 = load_file("embeddings_christian.pkl") emb4 = load_file("embeddings_christian_clean.pkl") emb_lfw = load_file("embeddings_lfw.pkl") # random.shuffle(emb1) random.shuffle(emb2) random.shuffle(emb4) # random.shuffle(emb4) train = emb1[0:50] test = emb2[0:50] ul = emb4[0:50] # ------ ABOD if True: print "----------------ABOD-----------------" clf = ABOD() clf.fit(train) pred_abod = clf.predict(ul) error_rate = float(len(pred_abod[pred_abod > 0])) / float( len(ul)) * 100 print "Misdetections ABOD (ul): {} - {}%".format( len(pred_abod[pred_abod > 0]), error_rate) pred_abod = clf.predict(test) error_rate = float(len(pred_abod[pred_abod > 0])) / float( len(ul)) * 100 print "Misdetections ABOD (test): {} - {}%".format( len(pred_abod[pred_abod < 0]), error_rate) # ------ THRESHOLDING print "--------------THRESHOLDING-------------------" t = BinaryThreshold() t.partial_fit(train) # test on outliers pred_thresh = t.predict(ul, True) error_rate = float(len(pred_thresh[pred_thresh > 0])) / float( len(pred_thresh)) * 100 print "Misdetections Thresholding (ul): {}/{} - {}%".format( len(pred_thresh[pred_thresh > 0]), len(pred_thresh), error_rate) # print np.where(pred_thresh == False)[0] # print np.nonzero(pred_thresh == 0)[0] # pred_thresh = t.predict(test, True) # test on inliers pred_thresh = t.predict(test, True) print "Misdetections Thresholding (test): {}/{}".format( len(np.where(pred_thresh == False)[0]), len(pred_thresh))
def cascaded_classifiers(): emb1 = load_file("embeddings_matthias.pkl") emb2 = load_file("embeddings_matthias_big.pkl") emb3 = load_file("embeddings_laia.pkl") emb4 = load_file("embeddings_christian.pkl") emb_lfw = load_file("embeddings_lfw.pkl") clf = SVC(kernel='linear', probability=True, C=1) clf2 = ABOD() # random.shuffle(emb1) train = emb1[0:50] test = emb2 ul = emb4 # train user and unknown class label_class = np.repeat(1, np.shape(train)[0]) label_unknown = np.repeat(0, np.shape(emb_lfw)[0]) training_embeddings = np.concatenate((train, emb_lfw)) training_labels = np.concatenate((label_class, label_unknown)) clf.fit(training_embeddings, training_labels) clf2.fit(train) # --------------------- test on class prediction = clf.predict(test) errors = len(test) - np.sum(prediction) print "SVM Error rate: {}%".format(float(errors) / len(test) * 100.0) temp = test # filter samples classified as 'unknown' filtered = temp[prediction == 0] # eval on abod abod_values = clf2.predict(filtered) errors = abod_values[abod_values < 0] print "Total error (inliers classified as outliers): {}%".format( float((len(errors)) / float(len(test)))) print "{}/{} additional inliers have been detected".format( len(abod_values[abod_values > 0]), len(filtered)) # --------------------- test on outlier print "-------------testing on outliers----------------" prediction = clf.predict(ul) errors = np.sum(prediction) print "SVM Error rate: {}%".format(float(errors) / len(ul) * 100.0) temp = ul # filter samples classified as 'inliers' filtered = temp[prediction == 1] # eval on abod abod_values = clf2.predict(filtered) errors = abod_values[abod_values > 0] print "Total error (outliers not detected): {}%".format( float((len(errors)) / float(len(ul)))) print "{}/{} additional outliers have been detected".format( len(abod_values[abod_values < 0]), len(filtered))
def eval_on_subspace(): emb1 = load_embeddings("embeddings_matthias.pkl") emb2 = load_embeddings("embeddings_matthias_big.pkl") emb3 = load_embeddings("embeddings_laia.pkl") emb4 = load_embeddings("embeddings_christian.pkl") emb_lfw = load_embeddings("embeddings_lfw.pkl") ref = emb1[0:40, :] test = emb1[40:60, :] ul = emb4[0:10, :] clf = ABOD() metric = 'euclidean' # extract 99.9% subspace # basis, mean = ExtractSubspace(ref, 0.9) basis, mean = ExtractInverseSubspace(ref, 0.7) print "--- reduced dimension to: {}".format(np.size(basis, 1)) # before sep1 = pairwise_distances(ref, test, metric=metric) sep2 = pairwise_distances(ref, ul, metric=metric) m1 = np.mean(sep1, axis=0) m2 = np.mean(sep2, axis=0) print "Original Space:" print "Max. dist.: inliers: {:.3f}, outliers: {:.3f}".format( sep1.max(), sep2.max()) clf.fit(ref) clf.predict(test) clf.predict(ul) # ---------------------------------------------- # project data onto subspace ref = ProjectOntoSubspace(ref, mean, basis) ul = ProjectOntoSubspace(ul, mean, basis) test = ProjectOntoSubspace(test, mean, basis) # compare sep1 = pairwise_distances(ref, test, metric=metric) sep2 = pairwise_distances(ref, ul, metric=metric) # meandist inliers print "------------------meandist to inliers-----------------------" print m1 print np.mean(sep1, axis=0) print "Mean decrease (pos): ", m1 - np.mean(sep1, axis=0) print "-----------------------------------------" # meandist outliers print "------------------meandist to outliers-----------------------" print m2 print np.mean(sep2, axis=0) print "Mean decrease (neg): ", m2 - np.mean(sep2, axis=0) clf.fit(ref) clf.predict(test) clf.predict(ul) print "Inlier Space:" print "Max. dist.: inliers: {:.3f}, outliers: {:.3f}".format( sep1.max(), sep2.max())
def ROC(clf): # PARAMETERS nr_training_samples = 5 nr_test_samples = 400 save_csv = True combine_scenes = False # --------------------------------------------- emb0 = load_embeddings("embeddings_matthias.pkl") emb1 = load_embeddings("matthias_test.pkl") emb2 = load_embeddings("matthias_test2.pkl") emb3 = load_embeddings("embeddings_christian_clean.pkl") emb_lfw = load_embeddings("embeddings_lfw.pkl") class_ds1 = emb1 class_ds2 = emb2 outlier_ds = emb_lfw # combine the two scene datasets if combine_scenes: num_samples_each = np.max([len(class_ds1), len(class_ds2)]) class_ds_combined = np.concatenate( (class_ds1[0:num_samples_each], class_ds2[0:num_samples_each])) else: class_ds_combined = class_ds1 # shuffle random.shuffle(class_ds_combined) # --------------------------------------------- # fit # clf = svm.OneClassSVM(kernel='linear') clf = ABOD() clf.fit(class_ds_combined[0:nr_training_samples]) # true labels labels = np.concatenate( (np.repeat(1, nr_test_samples / 2), np.repeat(2, nr_test_samples / 2))) test_samples = np.concatenate( (emb2[0:nr_test_samples / 2], emb3[0:nr_test_samples / 2])) # scores which are thresholded scores = clf.decision_function(test_samples) fpr, tpr, thresholds = metrics.roc_curve(labels, scores, pos_label=1) auc_val = auc(fpr, tpr) # --------------------------------------------- print "AUC: {}".format(auc_val) print "tpr: ", tpr print "fpr: ", fpr print "thresholds: ", thresholds precision, recall, _ = precision_recall_curve(labels, scores, pos_label=1) # print "Precision: ", precision # print "Recall: ", recall plt.plot(recall, precision) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Extension of Precision-Recall curve to multi-class') plt.legend(loc="lower right") plt.show()
auc_val = auc(fpr, tpr) # --------------------------------------------- print "AUC: {}".format(auc_val) print "tpr: ", tpr print "fpr: ", fpr print "thresholds: ", thresholds precision, recall, _ = precision_recall_curve(labels, scores, pos_label=1) # print "Precision: ", precision # print "Recall: ", recall plt.plot(recall, precision) plt.xlim([0.0, 1.0]) plt.ylim([0.0, 1.05]) plt.xlabel('Recall') plt.ylabel('Precision') plt.title('Extension of Precision-Recall curve to multi-class') plt.legend(loc="lower right") plt.show() # ================================= # # Main if __name__ == '__main__': clf = ABOD() ROC(clf)
def test_lmnn0(): d0 = load_data('embeddings_matthias.pkl') d1 = load_data('embeddings_christian.pkl') d0_train = d0[0:30,:] d1_train = d1[0:30,:] d0_test = d0[30:40,:] d1_test = d1[30:40,:] # ---------- train labels_train = np.concatenate((np.repeat(0, len(d0_train)), np.repeat(1, len(d1_train)))) data_train = np.concatenate((d0_train, d1_train)) lmnn = LMNN(k=3, learn_rate=1e-6) start = time.time() lmnn.fit(data_train, labels_train) print "Fitting took {} seconds".format(time.time()-start) # ---------- test print "---- Evaluation in original space: Metric against Class 0" print " Smaller valuer = better choice" cos_dist_orig00 = np.mean(pairwise_distances(d0_test, d0_train, metric='cosine')) cos_dist_orig01 = np.mean(pairwise_distances(d0_test, d1_train, metric='cosine')) cos_dist_orig10 = np.mean(pairwise_distances(d1_test, d0_train, metric='cosine')) cos_dist_orig11 = np.mean(pairwise_distances(d1_test, d1_train, metric='cosine')) print "Class 0 samples: Cosine distance: 0 - {:2f}, 1 - {:2f}".format(cos_dist_orig00, cos_dist_orig01) print "Class 1 samples: Cosine distance: 0 - {:2f}, 1 - {:2f}".format(cos_dist_orig10, cos_dist_orig11) print "---- Evaluation in learned space:" print " Smaller valuer = better choice" cos_dist_orig00 = np.mean(pairwise_distances(lmnn.transform(d0_test), lmnn.transform(d0_train), metric='cosine')) cos_dist_orig01 = np.mean(pairwise_distances(lmnn.transform(d0_test), lmnn.transform(d1_train), metric='cosine')) cos_dist_orig10 = np.mean(pairwise_distances(lmnn.transform(d1_test), lmnn.transform(d0_train), metric='cosine')) cos_dist_orig11 = np.mean(pairwise_distances(lmnn.transform(d1_test), lmnn.transform(d1_train), metric='cosine')) print "Class 0 samples: Cosine distance: 0 - {:2f}, 1 - {:2f}".format(cos_dist_orig00, cos_dist_orig01) print "Class 1 samples: Cosine distance: 0 - {:2f}, 1 - {:2f}".format(cos_dist_orig10, cos_dist_orig11) print "===========================ABOD====================================" clf0_orig = ABOD() clf1_orig = ABOD() clf0_opt = ABOD() clf1_opt = ABOD() # fit classifiers clf0_orig.fit(d0_train) clf1_orig.fit(d1_train) clf0_opt.fit(lmnn.transform(d0_train)) clf1_opt.fit(lmnn.transform(d1_train)) # predict print "\n-----------ABOD values in original space:------------------\n\n" clf0_orig.predict(d0_test) clf0_orig.predict(d1_test) clf1_orig.predict(d0_test) clf1_orig.predict(d1_test) # predict print "\n-----------ABOD values in custom space:------------------\n\n" clf0_opt.predict(lmnn.transform(d0_test)) clf0_opt.predict(lmnn.transform(d1_test)) clf1_opt.predict(lmnn.transform(d0_test)) clf1_opt.predict(lmnn.transform(d1_test))