Python ABOD Exemples, uids.online_learning.ABOD.ABOD Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : test_ABOD.py Projet : alialavia/UserIdentification

def compare_abod_to_svm():
    emb1 = load_file("embeddings_matthias.pkl")
    emb2 = load_file("embeddings_matthias_big.pkl")
    emb3 = load_file("embeddings_laia.pkl")
    emb4 = load_file("embeddings_christian.pkl")
    emb_lfw = load_file("embeddings_lfw.pkl")

    clf = SVC(kernel='linear', probability=True)
    clf2 = ABOD()

    # train user and unknown class
    label_class = np.repeat(1, np.shape(emb1[0:100])[0])
    label_unknown = np.repeat(0, np.shape(emb_lfw)[0])
    training_embeddings = np.concatenate((emb1[0:100], emb_lfw))
    training_labels = np.concatenate((label_class, label_unknown))
    # train svm
    clf.fit(training_embeddings, training_labels)
    # train abod
    clf2.fit(emb1[0:100])

    # test on class
    prediction = clf.predict(emb2[0:100])
    errors = len(emb2[0:100]) - np.sum(prediction)
    print "Error rate: {}%".format(float(errors) / len(emb2[0:100]) * 100.0)

    # test on similar class
    prediction = clf.predict(emb4)
    errors = np.sum(prediction)
    print "Error rate: {}%".format(float(errors) / len(emb4) * 100.0)

Exemple #2

0

Afficher le fichier

 def __init__(self, test_offline=False, cluster=None):
     ABOD.__init__(self)
     self.__test_offline = test_offline
     if cluster is None:
         self.data_cluster = MeanShiftCluster()
     else:
         assert issubclass(cluster, ClusterBase)
         self.data_cluster = cluster

Exemple #3

0

Afficher le fichier

Fichier : BatchProcessing.py Projet : alialavia/UserIdentification

 def generate_classifier(self):
     if self.CLASSIFIER == 'ABOD':
         return ABOD()
     elif self.CLASSIFIER == 'IABOD':
         return IABOD()
     elif self.CLASSIFIER == 'ISVM':
         return ISVM(self.__unknown_class_data)

Exemple #4

0

Afficher le fichier

def test_ABOD_1():

    clf = ABOD()

    emb1 = load_embeddings("embeddings_elias.pkl")
    emb2 = load_embeddings("embeddings_matthias.pkl")
    emb3 = load_embeddings("embeddings_matthias_big.pkl")
    emb4 = load_embeddings("embeddings_laia.pkl")
    emb5 = load_embeddings("embeddings_christian.pkl")
    emb_lfw = load_embeddings("embeddings_lfw.pkl")

    # randomize data
    np.random.shuffle(emb2)

    # extract 99% variance subspace
    basis, mean = ExtractSubspace(emb2, 0.99)

    start = time.time()

    # reduce data
    data = ProjectOntoSubspace(emb2, mean, basis)
    dims = np.shape(data)

    # select minimum data to build convex hull
    min_nr_elems = dims[1] + 4
    data_hull = data[0:min_nr_elems + 1, :]

    print np.shape(data)

    # calculate hull
    #

    # ----------- Delauny tesselation
    if False:
        hull = Delaunay(data_hull)
        # print (hull.find_simplex(data[10, :]) >= 0)

        elems_in_hull = np.sum(
            [1 if hull.find_simplex(sample) >= 0 else 0 for sample in data])
        print "Elements inside hull: {} | Hull points: {}".format(
            elems_in_hull, len(data_hull))

    # ----------- Convex hull (subgraph)
    if False:
        hull = ConvexHull(data_hull)
        # the vertices of the convex hull
        hull_points = hull.vertices
        # points inside hull
        print set(range(len(data_hull))).difference(hull.vertices)

    print "elements: {} | time: {}".format(min_nr_elems, time.time() - start)

Exemple #5

0

Afficher le fichier

Fichier : ABOD.py Projet : alialavia/UserIdentification

def test_ABOD():

    clf = ABOD()

    emb1 = load_embeddings("embeddings_elias.pkl")
    emb2 = load_embeddings("embeddings_matthias.pkl")
    emb3 = load_embeddings("embeddings_matthias_big.pkl")
    emb4 = load_embeddings("embeddings_laia.pkl")
    emb5 = load_embeddings("embeddings_christian.pkl")
    emb_lfw = load_embeddings("embeddings_lfw.pkl")

    clf.fit(emb2)

    # class_sample = emb3[100,:]
    # outlier_sample = emb1[30,:]

    # print class_sample

    start = time.time()
    abod_class = clf.predict_approx(emb3)
    print "time: ".format(time.time() - start)

    return
    abod_outliers = clf.predict(emb5)
    step = 0.0001
    start = 0.005
    stop = 0.6
    il = []
    ul = []
    x = np.arange(start, stop, step)
    for thresh in x:
        il.append(
            float(len(abod_class[abod_class < thresh])) / len(abod_class) *
            100.0)
        ul.append(
            float(len(abod_outliers[abod_outliers > thresh])) /
            len(abod_outliers) * 100.0)

    plt.plot(x, il, color='green', label="Inliers")
    plt.plot(x, ul, color='red', label="Outliers")
    plt.title("Classification Error")
    plt.xlabel("Threshold")
    plt.ylabel("Error [%]")
    plt.legend()
    plt.show()

    #
    thresh = 0.2
    print "error il: {}/{} : {}%".format(
        len(abod_class[abod_class < 0.2]), len(abod_class),
        float(len(abod_class[abod_class < 0.2])) / len(abod_class) * 100.0)
    print "error ul: {}/{} : {}%".format(
        len(abod_outliers[abod_outliers > 0.2]), len(abod_outliers),
        float(len(abod_outliers[abod_outliers > 0.2])) / len(abod_outliers) *
        100.0)

Exemple #6

0

Afficher le fichier

Fichier : test_ABOD.py Projet : alialavia/UserIdentification

def test_against_threshold():
    emb1 = load_file("embeddings_matthias.pkl")
    emb2 = load_file("embeddings_matthias_big.pkl")
    emb3 = load_file("embeddings_laia.pkl")
    # emb4 = load_file("embeddings_christian.pkl")
    emb4 = load_file("embeddings_christian_clean.pkl")
    emb_lfw = load_file("embeddings_lfw.pkl")

    # random.shuffle(emb1)
    random.shuffle(emb2)
    random.shuffle(emb4)
    # random.shuffle(emb4)

    train = emb1[0:50]
    test = emb2[0:50]
    ul = emb4[0:50]

    # ------ ABOD
    if True:
        print "----------------ABOD-----------------"
        clf = ABOD()
        clf.fit(train)
        pred_abod = clf.predict(ul)
        error_rate = float(len(pred_abod[pred_abod > 0])) / float(
            len(ul)) * 100
        print "Misdetections ABOD (ul): {} - {}%".format(
            len(pred_abod[pred_abod > 0]), error_rate)

        pred_abod = clf.predict(test)
        error_rate = float(len(pred_abod[pred_abod > 0])) / float(
            len(ul)) * 100
        print "Misdetections ABOD (test): {} - {}%".format(
            len(pred_abod[pred_abod < 0]), error_rate)

    # ------ THRESHOLDING
    print "--------------THRESHOLDING-------------------"
    t = BinaryThreshold()
    t.partial_fit(train)

    # test on outliers
    pred_thresh = t.predict(ul, True)
    error_rate = float(len(pred_thresh[pred_thresh > 0])) / float(
        len(pred_thresh)) * 100
    print "Misdetections Thresholding (ul): {}/{} - {}%".format(
        len(pred_thresh[pred_thresh > 0]), len(pred_thresh), error_rate)

    # print np.where(pred_thresh == False)[0]
    # print np.nonzero(pred_thresh == 0)[0]
    # pred_thresh = t.predict(test, True)

    # test on inliers
    pred_thresh = t.predict(test, True)
    print "Misdetections Thresholding (test): {}/{}".format(
        len(np.where(pred_thresh == False)[0]), len(pred_thresh))

Exemple #7

0

Afficher le fichier

Fichier : test_ABOD.py Projet : alialavia/UserIdentification

def cascaded_classifiers():
    emb1 = load_file("embeddings_matthias.pkl")
    emb2 = load_file("embeddings_matthias_big.pkl")
    emb3 = load_file("embeddings_laia.pkl")
    emb4 = load_file("embeddings_christian.pkl")
    emb_lfw = load_file("embeddings_lfw.pkl")

    clf = SVC(kernel='linear', probability=True, C=1)
    clf2 = ABOD()

    # random.shuffle(emb1)

    train = emb1[0:50]
    test = emb2
    ul = emb4

    # train user and unknown class
    label_class = np.repeat(1, np.shape(train)[0])
    label_unknown = np.repeat(0, np.shape(emb_lfw)[0])
    training_embeddings = np.concatenate((train, emb_lfw))
    training_labels = np.concatenate((label_class, label_unknown))
    clf.fit(training_embeddings, training_labels)
    clf2.fit(train)

    # --------------------- test on class
    prediction = clf.predict(test)
    errors = len(test) - np.sum(prediction)
    print "SVM Error rate: {}%".format(float(errors) / len(test) * 100.0)
    temp = test
    # filter samples classified as 'unknown'
    filtered = temp[prediction == 0]
    # eval on abod
    abod_values = clf2.predict(filtered)
    errors = abod_values[abod_values < 0]
    print "Total error (inliers classified as outliers): {}%".format(
        float((len(errors)) / float(len(test))))
    print "{}/{} additional inliers have been detected".format(
        len(abod_values[abod_values > 0]), len(filtered))

    # --------------------- test on outlier
    print "-------------testing on outliers----------------"
    prediction = clf.predict(ul)
    errors = np.sum(prediction)
    print "SVM Error rate: {}%".format(float(errors) / len(ul) * 100.0)
    temp = ul
    # filter samples classified as 'inliers'
    filtered = temp[prediction == 1]
    # eval on abod
    abod_values = clf2.predict(filtered)
    errors = abod_values[abod_values > 0]
    print "Total error (outliers not detected): {}%".format(
        float((len(errors)) / float(len(ul))))
    print "{}/{} additional outliers have been detected".format(
        len(abod_values[abod_values < 0]), len(filtered))

Exemple #8

0

Afficher le fichier

def eval_on_subspace():
    emb1 = load_embeddings("embeddings_matthias.pkl")
    emb2 = load_embeddings("embeddings_matthias_big.pkl")
    emb3 = load_embeddings("embeddings_laia.pkl")
    emb4 = load_embeddings("embeddings_christian.pkl")
    emb_lfw = load_embeddings("embeddings_lfw.pkl")

    ref = emb1[0:40, :]
    test = emb1[40:60, :]
    ul = emb4[0:10, :]

    clf = ABOD()
    metric = 'euclidean'

    # extract 99.9% subspace
    # basis, mean = ExtractSubspace(ref, 0.9)
    basis, mean = ExtractInverseSubspace(ref, 0.7)

    print "--- reduced dimension to: {}".format(np.size(basis, 1))

    # before
    sep1 = pairwise_distances(ref, test, metric=metric)
    sep2 = pairwise_distances(ref, ul, metric=metric)

    m1 = np.mean(sep1, axis=0)
    m2 = np.mean(sep2, axis=0)

    print "Original Space:"
    print "Max. dist.: inliers: {:.3f}, outliers: {:.3f}".format(
        sep1.max(), sep2.max())

    clf.fit(ref)
    clf.predict(test)
    clf.predict(ul)

    # ----------------------------------------------

    # project data onto subspace
    ref = ProjectOntoSubspace(ref, mean, basis)
    ul = ProjectOntoSubspace(ul, mean, basis)
    test = ProjectOntoSubspace(test, mean, basis)

    # compare
    sep1 = pairwise_distances(ref, test, metric=metric)
    sep2 = pairwise_distances(ref, ul, metric=metric)

    # meandist inliers
    print "------------------meandist to inliers-----------------------"
    print m1
    print np.mean(sep1, axis=0)
    print "Mean decrease (pos): ", m1 - np.mean(sep1, axis=0)
    print "-----------------------------------------"

    # meandist outliers
    print "------------------meandist to outliers-----------------------"
    print m2
    print np.mean(sep2, axis=0)
    print "Mean decrease (neg): ", m2 - np.mean(sep2, axis=0)

    clf.fit(ref)
    clf.predict(test)
    clf.predict(ul)

    print "Inlier Space:"
    print "Max. dist.: inliers: {:.3f}, outliers: {:.3f}".format(
        sep1.max(), sep2.max())

Exemple #9

0

Afficher le fichier

def ROC(clf):

    # PARAMETERS
    nr_training_samples = 5
    nr_test_samples = 400

    save_csv = True
    combine_scenes = False

    # ---------------------------------------------

    emb0 = load_embeddings("embeddings_matthias.pkl")
    emb1 = load_embeddings("matthias_test.pkl")
    emb2 = load_embeddings("matthias_test2.pkl")
    emb3 = load_embeddings("embeddings_christian_clean.pkl")
    emb_lfw = load_embeddings("embeddings_lfw.pkl")

    class_ds1 = emb1
    class_ds2 = emb2
    outlier_ds = emb_lfw

    # combine the two scene datasets
    if combine_scenes:
        num_samples_each = np.max([len(class_ds1), len(class_ds2)])
        class_ds_combined = np.concatenate(
            (class_ds1[0:num_samples_each], class_ds2[0:num_samples_each]))
    else:
        class_ds_combined = class_ds1
    # shuffle
    random.shuffle(class_ds_combined)

    # ---------------------------------------------

    # fit
    # clf = svm.OneClassSVM(kernel='linear')
    clf = ABOD()
    clf.fit(class_ds_combined[0:nr_training_samples])

    # true labels
    labels = np.concatenate(
        (np.repeat(1, nr_test_samples / 2), np.repeat(2, nr_test_samples / 2)))

    test_samples = np.concatenate(
        (emb2[0:nr_test_samples / 2], emb3[0:nr_test_samples / 2]))

    # scores which are thresholded
    scores = clf.decision_function(test_samples)

    fpr, tpr, thresholds = metrics.roc_curve(labels, scores, pos_label=1)
    auc_val = auc(fpr, tpr)

    # ---------------------------------------------

    print "AUC: {}".format(auc_val)
    print "tpr: ", tpr
    print "fpr: ", fpr
    print "thresholds: ", thresholds

    precision, recall, _ = precision_recall_curve(labels, scores, pos_label=1)
    # print "Precision: ", precision
    # print "Recall: ", recall

    plt.plot(recall, precision)

    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Extension of Precision-Recall curve to multi-class')
    plt.legend(loc="lower right")
    plt.show()

Exemple #10

0

Afficher le fichier

    auc_val = auc(fpr, tpr)

    # ---------------------------------------------

    print "AUC: {}".format(auc_val)
    print "tpr: ", tpr
    print "fpr: ", fpr
    print "thresholds: ", thresholds

    precision, recall, _ = precision_recall_curve(labels, scores, pos_label=1)
    # print "Precision: ", precision
    # print "Recall: ", recall

    plt.plot(recall, precision)

    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('Recall')
    plt.ylabel('Precision')
    plt.title('Extension of Precision-Recall curve to multi-class')
    plt.legend(loc="lower right")
    plt.show()


# ================================= #
#              Main

if __name__ == '__main__':

    clf = ABOD()
    ROC(clf)

Exemple #11

0

Afficher le fichier

def test_lmnn0():

    d0 = load_data('embeddings_matthias.pkl')
    d1 = load_data('embeddings_christian.pkl')

    d0_train = d0[0:30,:]
    d1_train = d1[0:30,:]
    d0_test = d0[30:40,:]
    d1_test = d1[30:40,:]

    # ---------- train
    labels_train = np.concatenate((np.repeat(0, len(d0_train)), np.repeat(1, len(d1_train))))
    data_train = np.concatenate((d0_train, d1_train))

    lmnn = LMNN(k=3, learn_rate=1e-6)
    start = time.time()
    lmnn.fit(data_train, labels_train)
    print "Fitting took {} seconds".format(time.time()-start)

    # ---------- test

    print "---- Evaluation in original space: Metric against Class 0"
    print "     Smaller valuer = better choice"
    cos_dist_orig00 = np.mean(pairwise_distances(d0_test, d0_train, metric='cosine'))
    cos_dist_orig01 = np.mean(pairwise_distances(d0_test, d1_train, metric='cosine'))
    cos_dist_orig10 = np.mean(pairwise_distances(d1_test, d0_train, metric='cosine'))
    cos_dist_orig11 = np.mean(pairwise_distances(d1_test, d1_train, metric='cosine'))
    print "Class 0 samples: Cosine distance: 0 - {:2f}, 1 - {:2f}".format(cos_dist_orig00, cos_dist_orig01)
    print "Class 1 samples: Cosine distance: 0 - {:2f}, 1 - {:2f}".format(cos_dist_orig10, cos_dist_orig11)

    print "---- Evaluation in learned space:"
    print "     Smaller valuer = better choice"
    cos_dist_orig00 = np.mean(pairwise_distances(lmnn.transform(d0_test), lmnn.transform(d0_train), metric='cosine'))
    cos_dist_orig01 = np.mean(pairwise_distances(lmnn.transform(d0_test), lmnn.transform(d1_train), metric='cosine'))
    cos_dist_orig10 = np.mean(pairwise_distances(lmnn.transform(d1_test), lmnn.transform(d0_train), metric='cosine'))
    cos_dist_orig11 = np.mean(pairwise_distances(lmnn.transform(d1_test), lmnn.transform(d1_train), metric='cosine'))
    print "Class 0 samples: Cosine distance: 0 - {:2f}, 1 - {:2f}".format(cos_dist_orig00, cos_dist_orig01)
    print "Class 1 samples: Cosine distance: 0 - {:2f}, 1 - {:2f}".format(cos_dist_orig10, cos_dist_orig11)


    print "===========================ABOD===================================="

    clf0_orig = ABOD()
    clf1_orig = ABOD()
    clf0_opt = ABOD()
    clf1_opt = ABOD()

    # fit classifiers
    clf0_orig.fit(d0_train)
    clf1_orig.fit(d1_train)
    clf0_opt.fit(lmnn.transform(d0_train))
    clf1_opt.fit(lmnn.transform(d1_train))

    # predict
    print "\n-----------ABOD values in original space:------------------\n\n"
    clf0_orig.predict(d0_test)
    clf0_orig.predict(d1_test)
    clf1_orig.predict(d0_test)
    clf1_orig.predict(d1_test)

    # predict
    print "\n-----------ABOD values in custom space:------------------\n\n"
    clf0_opt.predict(lmnn.transform(d0_test))
    clf0_opt.predict(lmnn.transform(d1_test))
    clf1_opt.predict(lmnn.transform(d0_test))
    clf1_opt.predict(lmnn.transform(d1_test))