def cascaded_classifiers(): emb1 = load_file("embeddings_matthias.pkl") emb2 = load_file("embeddings_matthias_big.pkl") emb3 = load_file("embeddings_laia.pkl") emb4 = load_file("embeddings_christian.pkl") emb_lfw = load_file("embeddings_lfw.pkl") clf = SVC(kernel='linear', probability=True, C=1) clf2 = ABOD() # random.shuffle(emb1) train = emb1[0:50] test = emb2 ul = emb4 # train user and unknown class label_class = np.repeat(1, np.shape(train)[0]) label_unknown = np.repeat(0, np.shape(emb_lfw)[0]) training_embeddings = np.concatenate((train, emb_lfw)) training_labels = np.concatenate((label_class, label_unknown)) clf.fit(training_embeddings, training_labels) clf2.fit(train) # --------------------- test on class prediction = clf.predict(test) errors = len(test) - np.sum(prediction) print "SVM Error rate: {}%".format(float(errors) / len(test) * 100.0) temp = test # filter samples classified as 'unknown' filtered = temp[prediction == 0] # eval on abod abod_values = clf2.predict(filtered) errors = abod_values[abod_values < 0] print "Total error (inliers classified as outliers): {}%".format( float((len(errors)) / float(len(test)))) print "{}/{} additional inliers have been detected".format( len(abod_values[abod_values > 0]), len(filtered)) # --------------------- test on outlier print "-------------testing on outliers----------------" prediction = clf.predict(ul) errors = np.sum(prediction) print "SVM Error rate: {}%".format(float(errors) / len(ul) * 100.0) temp = ul # filter samples classified as 'inliers' filtered = temp[prediction == 1] # eval on abod abod_values = clf2.predict(filtered) errors = abod_values[abod_values > 0] print "Total error (outliers not detected): {}%".format( float((len(errors)) / float(len(ul)))) print "{}/{} additional outliers have been detected".format( len(abod_values[abod_values < 0]), len(filtered))
def test_against_threshold(): emb1 = load_file("embeddings_matthias.pkl") emb2 = load_file("embeddings_matthias_big.pkl") emb3 = load_file("embeddings_laia.pkl") # emb4 = load_file("embeddings_christian.pkl") emb4 = load_file("embeddings_christian_clean.pkl") emb_lfw = load_file("embeddings_lfw.pkl") # random.shuffle(emb1) random.shuffle(emb2) random.shuffle(emb4) # random.shuffle(emb4) train = emb1[0:50] test = emb2[0:50] ul = emb4[0:50] # ------ ABOD if True: print "----------------ABOD-----------------" clf = ABOD() clf.fit(train) pred_abod = clf.predict(ul) error_rate = float(len(pred_abod[pred_abod > 0])) / float( len(ul)) * 100 print "Misdetections ABOD (ul): {} - {}%".format( len(pred_abod[pred_abod > 0]), error_rate) pred_abod = clf.predict(test) error_rate = float(len(pred_abod[pred_abod > 0])) / float( len(ul)) * 100 print "Misdetections ABOD (test): {} - {}%".format( len(pred_abod[pred_abod < 0]), error_rate) # ------ THRESHOLDING print "--------------THRESHOLDING-------------------" t = BinaryThreshold() t.partial_fit(train) # test on outliers pred_thresh = t.predict(ul, True) error_rate = float(len(pred_thresh[pred_thresh > 0])) / float( len(pred_thresh)) * 100 print "Misdetections Thresholding (ul): {}/{} - {}%".format( len(pred_thresh[pred_thresh > 0]), len(pred_thresh), error_rate) # print np.where(pred_thresh == False)[0] # print np.nonzero(pred_thresh == 0)[0] # pred_thresh = t.predict(test, True) # test on inliers pred_thresh = t.predict(test, True) print "Misdetections Thresholding (test): {}/{}".format( len(np.where(pred_thresh == False)[0]), len(pred_thresh))
def plot_threshold_influence(): clf = ABOD() emb1 = load_file("embeddings_elias.pkl") emb2 = load_file("embeddings_matthias.pkl") emb3 = load_file("embeddings_matthias_big.pkl") emb4 = load_file("embeddings_laia.pkl") emb5 = load_file("embeddings_christian.pkl") emb_lfw = load_file("embeddings_lfw.pkl") clf.fit(emb2[0:100]) # class_sample = emb3[100,:] # outlier_sample = emb1[30,:] abod_class = clf.predict(emb3[0:100]) abod_outliers = clf.predict(emb5) step = 0.0001 start = 0.005 stop = 0.6 il = [] ul = [] x = np.arange(start, stop, step) for thresh in x: il.append( float(len(abod_class[abod_class < thresh])) / len(abod_class) * 100.0) ul.append( float(len(abod_outliers[abod_outliers > thresh])) / len(abod_outliers) * 100.0) plt.plot(x, il, color='green', label="Inliers") plt.plot(x, ul, color='red', label="Outliers") plt.title("Classification Error") plt.xlabel("Threshold") plt.ylabel("Error [%]") plt.legend() plt.show() # thresh = 0.2 print "error il: {}/{} : {}%".format( len(abod_class[abod_class < 0.2]), len(abod_class), float(len(abod_class[abod_class < 0.2])) / len(abod_class) * 100.0) print "error ul: {}/{} : {}%".format( len(abod_outliers[abod_outliers > 0.2]), len(abod_outliers), float(len(abod_outliers[abod_outliers > 0.2])) / len(abod_outliers) * 100.0)
def eval_on_subspace(): emb1 = load_embeddings("embeddings_matthias.pkl") emb2 = load_embeddings("embeddings_matthias_big.pkl") emb3 = load_embeddings("embeddings_laia.pkl") emb4 = load_embeddings("embeddings_christian.pkl") emb_lfw = load_embeddings("embeddings_lfw.pkl") ref = emb1[0:40, :] test = emb1[40:60, :] ul = emb4[0:10, :] clf = ABOD() metric = 'euclidean' # extract 99.9% subspace # basis, mean = ExtractSubspace(ref, 0.9) basis, mean = ExtractInverseSubspace(ref, 0.7) print "--- reduced dimension to: {}".format(np.size(basis, 1)) # before sep1 = pairwise_distances(ref, test, metric=metric) sep2 = pairwise_distances(ref, ul, metric=metric) m1 = np.mean(sep1, axis=0) m2 = np.mean(sep2, axis=0) print "Original Space:" print "Max. dist.: inliers: {:.3f}, outliers: {:.3f}".format( sep1.max(), sep2.max()) clf.fit(ref) clf.predict(test) clf.predict(ul) # ---------------------------------------------- # project data onto subspace ref = ProjectOntoSubspace(ref, mean, basis) ul = ProjectOntoSubspace(ul, mean, basis) test = ProjectOntoSubspace(test, mean, basis) # compare sep1 = pairwise_distances(ref, test, metric=metric) sep2 = pairwise_distances(ref, ul, metric=metric) # meandist inliers print "------------------meandist to inliers-----------------------" print m1 print np.mean(sep1, axis=0) print "Mean decrease (pos): ", m1 - np.mean(sep1, axis=0) print "-----------------------------------------" # meandist outliers print "------------------meandist to outliers-----------------------" print m2 print np.mean(sep2, axis=0) print "Mean decrease (neg): ", m2 - np.mean(sep2, axis=0) clf.fit(ref) clf.predict(test) clf.predict(ul) print "Inlier Space:" print "Max. dist.: inliers: {:.3f}, outliers: {:.3f}".format( sep1.max(), sep2.max())
def test_lmnn0(): d0 = load_data('embeddings_matthias.pkl') d1 = load_data('embeddings_christian.pkl') d0_train = d0[0:30,:] d1_train = d1[0:30,:] d0_test = d0[30:40,:] d1_test = d1[30:40,:] # ---------- train labels_train = np.concatenate((np.repeat(0, len(d0_train)), np.repeat(1, len(d1_train)))) data_train = np.concatenate((d0_train, d1_train)) lmnn = LMNN(k=3, learn_rate=1e-6) start = time.time() lmnn.fit(data_train, labels_train) print "Fitting took {} seconds".format(time.time()-start) # ---------- test print "---- Evaluation in original space: Metric against Class 0" print " Smaller valuer = better choice" cos_dist_orig00 = np.mean(pairwise_distances(d0_test, d0_train, metric='cosine')) cos_dist_orig01 = np.mean(pairwise_distances(d0_test, d1_train, metric='cosine')) cos_dist_orig10 = np.mean(pairwise_distances(d1_test, d0_train, metric='cosine')) cos_dist_orig11 = np.mean(pairwise_distances(d1_test, d1_train, metric='cosine')) print "Class 0 samples: Cosine distance: 0 - {:2f}, 1 - {:2f}".format(cos_dist_orig00, cos_dist_orig01) print "Class 1 samples: Cosine distance: 0 - {:2f}, 1 - {:2f}".format(cos_dist_orig10, cos_dist_orig11) print "---- Evaluation in learned space:" print " Smaller valuer = better choice" cos_dist_orig00 = np.mean(pairwise_distances(lmnn.transform(d0_test), lmnn.transform(d0_train), metric='cosine')) cos_dist_orig01 = np.mean(pairwise_distances(lmnn.transform(d0_test), lmnn.transform(d1_train), metric='cosine')) cos_dist_orig10 = np.mean(pairwise_distances(lmnn.transform(d1_test), lmnn.transform(d0_train), metric='cosine')) cos_dist_orig11 = np.mean(pairwise_distances(lmnn.transform(d1_test), lmnn.transform(d1_train), metric='cosine')) print "Class 0 samples: Cosine distance: 0 - {:2f}, 1 - {:2f}".format(cos_dist_orig00, cos_dist_orig01) print "Class 1 samples: Cosine distance: 0 - {:2f}, 1 - {:2f}".format(cos_dist_orig10, cos_dist_orig11) print "===========================ABOD====================================" clf0_orig = ABOD() clf1_orig = ABOD() clf0_opt = ABOD() clf1_opt = ABOD() # fit classifiers clf0_orig.fit(d0_train) clf1_orig.fit(d1_train) clf0_opt.fit(lmnn.transform(d0_train)) clf1_opt.fit(lmnn.transform(d1_train)) # predict print "\n-----------ABOD values in original space:------------------\n\n" clf0_orig.predict(d0_test) clf0_orig.predict(d1_test) clf1_orig.predict(d0_test) clf1_orig.predict(d1_test) # predict print "\n-----------ABOD values in custom space:------------------\n\n" clf0_opt.predict(lmnn.transform(d0_test)) clf0_opt.predict(lmnn.transform(d1_test)) clf1_opt.predict(lmnn.transform(d0_test)) clf1_opt.predict(lmnn.transform(d1_test))