コード例 #1
0
def hplsfacev(args, parallel_pool):
    print('>> LOADING DATASET FOLDS')
    pos_folds, neg_folds = read_fold_file(COLLECTION)
    assert len(pos_folds) == len(neg_folds)
    
    print('>> LOADING DATASET FEATURES')
    with open(PATH + FEATURES, 'rb') as input_file:
        collection_paths, collection_labels, collection_features = pickle.load(input_file)
    collection_dict = {value:index for index,value in enumerate(collection_paths)}
    collection_list = zip(collection_paths, collection_labels)

    pr_results = {}
    roc_results = {}
    for fold_index in range(len(pos_folds)):
        neg_matrix_x = []
        pos_matrix_x = []
        for train_index in range(len(pos_folds)):
            if train_index != fold_index:
                print(' > EXPLORING TRAINING FEATURES - FOLD %d' % (train_index + 1))
                pos_f = pos_folds[train_index]
                neg_f = neg_folds[train_index]
                
                print('  - Positive tuples:')
                for tuple in pos_f: 
                    sample_a, sample_b = mount_tuple(tuple, DATASET)
                    if collection_dict.has_key(sample_a) and collection_dict.has_key(sample_b):
                        feat_a = collection_features[collection_dict[sample_a]]
                        feat_b = collection_features[collection_dict[sample_b]]
                        diff_feat = np.absolute(np.subtract(feat_a, feat_b))
                        pos_matrix_x.append(diff_feat)
                    else:
                        print(sample_a, sample_b, 'NOT FOUND')
                
                print('  - Negative tuples:')
                for tuple in neg_f: 
                    sample_a, sample_b = mount_tuple(tuple, DATASET)
                    if collection_dict.has_key(sample_a) and collection_dict.has_key(sample_b):
                        feat_a = collection_features[collection_dict[sample_a]]
                        feat_b = collection_features[collection_dict[sample_b]]
                        diff_feat = np.absolute(np.subtract(feat_a, feat_b))
                        neg_matrix_x.append(diff_feat)
                    else:
                        print(sample_a, sample_b, 'NOT FOUND')

        print('  - Split into Chunks: %d models, %d samples' % (HASH_MODELS, HASH_SAMPLES))
        neg_splits = []
        pos_splits = []
        assert HASH_SAMPLES < len(neg_matrix_x) and HASH_SAMPLES < len(pos_matrix_x)
        for index in range(HASH_MODELS):
            neg_splits.append(random.sample(neg_matrix_x, HASH_SAMPLES))
            pos_splits.append(random.sample(pos_matrix_x, HASH_SAMPLES))

        print(' > LEARNING PLS MODEL - FOLD %d' % (fold_index + 1))
        models = parallel_pool(
            delayed(learn_plsh_v_model) (pos_s, neg_s) for (pos_s, neg_s) in zip(pos_splits, neg_splits)
        )
        
        results_c = []
        results_v = []
        for test_index in range(len(pos_folds)):
            if test_index == fold_index:
                print(' > EXPLORING TESTING FEATURES - FOLD %d' % (test_index + 1))
                pos_f = pos_folds[test_index]
                neg_f = neg_folds[test_index]
                
                print('  - Positive tuples:')
                for tuple in pos_f: 
                    sample_a, sample_b = mount_tuple(tuple, DATASET)
                    if collection_dict.has_key(sample_a) and collection_dict.has_key(sample_b):
                        feat_a = collection_features[collection_dict[sample_a]]
                        feat_b = collection_features[collection_dict[sample_b]]
                        diff_feat = np.absolute(np.subtract(feat_a, feat_b))
                        response_c = [model.predict_confidence(diff_feat) for model in models]
                        response_v = [model.predict_value(diff_feat) for model in models]
                        results_c.append((np.sum(response_c), 1.0))
                        results_v.append((np.mean(response_v), 1.0))
                        print(sample_a, sample_b, np.sum(response_c), np.mean(response_v))
                    else:
                        print(sample_a, sample_b, 'NOT FOUND')
                
                print('  - Negative tuples:')
                for tuple in neg_f: 
                    sample_a, sample_b = mount_tuple(tuple, DATASET)
                    if collection_dict.has_key(sample_a) and collection_dict.has_key(sample_b):
                        feat_a = collection_features[collection_dict[sample_a]]
                        feat_b = collection_features[collection_dict[sample_b]]
                        diff_feat = np.absolute(np.subtract(feat_a, feat_b))
                        response_c = [model.predict_confidence(diff_feat) for model in models]
                        response_v = [model.predict_value(diff_feat) for model in models]
                        results_c.append((np.sum(response_c), 0.0))
                        results_v.append((np.mean(response_v), 0.0))
                        print(sample_a, sample_b, np.sum(response_c), np.mean(response_v))
                    else:
                        print(sample_a, sample_b, 'NOT FOUND')
        # raw_input('Press ENTER key to continue...')
        plotting_labels = []
        plotting_scores = []
        for res in results_v:
            plotting_labels.append(('_', res[1]))
            plotting_scores.append(('_', res[0]))
            
        pr_results[fold_index] = generate_precision_recall(plotting_labels, plotting_scores)
        roc_results[fold_index] = generate_roc_curve(plotting_labels, plotting_scores)
    return pr_results, roc_results
コード例 #2
0
def hplsfacev(args, parallel_pool):
    print('>> LOADING TRAINING FEATURES')
    with open(PATH + FEATURES_TRAIN, 'rb') as input_file:
        train_paths, train_labels, train_features = pickle.load(input_file)

    print('>> EXPLORING TRAINING FEATURES')
    train_dict = {value: index for index, value in enumerate(train_paths)}
    train_list = zip(train_paths, train_labels)
    pos_splits, neg_splits = split_into_chunks(train_list, HASH_MODELS,
                                               HASH_SAMPLES)

    print('>> LEARNING PLS MODELS:')
    models = parallel_pool(
        delayed(learn_plsh_cv_model)(train_features, train_dict, pos_s, neg_s)
        for (pos_s, neg_s) in zip(pos_splits, neg_splits))

    print('>> REMOVING TRAINING FEATURES')
    del train_paths[:]
    del train_labels[:]
    del train_features[:]

    print('>> LOADING PROBE FEATURES')
    pos_folds, neg_folds = read_fold_file(COLLECTION)
    with open(PATH + FEATURES_TEST, 'rb') as input_file:
        test_paths, test_labels, test_features = pickle.load(input_file)

    print('>> EXPLORING PROBE FEATURES')
    test_dict = {value: index for index, value in enumerate(test_paths)}
    test_list = zip(test_paths, test_labels)

    assert len(pos_folds) == len(neg_folds)
    pr_results = {}
    roc_results = {}
    for fold_index in range(len(pos_folds)):
        print('>> Fold #%s' % str(fold_index + 1))
        pos_f = pos_folds[fold_index]
        neg_f = neg_folds[fold_index]

        results_c = []
        results_v = []
        print('> Positive probes:')
        for tuple in pos_f:
            sample_a, sample_b = mount_tuple(tuple, DATASET)
            if test_dict.has_key(sample_a) and test_dict.has_key(sample_b):
                feat_a = test_features[test_dict[sample_a]]
                feat_b = test_features[test_dict[sample_b]]
                diff_feat = np.absolute(np.subtract(feat_a, feat_b))
                response_c = [
                    model[0].predict_confidence(diff_feat) for model in models
                ]
                response_v = [
                    model[0].predict_value(diff_feat) for model in models
                ]
                results_c.append((np.sum(response_c), 1))
                results_v.append((np.mean(response_v), 1))
                print(sample_a, sample_b, np.sum(response_c),
                      np.mean(response_v))

        print('> Negative probes:')
        for tuple in neg_f:
            sample_a, sample_b = mount_tuple(tuple, DATASET)
            if test_dict.has_key(sample_a) and test_dict.has_key(sample_b):
                feat_a = test_features[test_dict[sample_a]]
                feat_b = test_features[test_dict[sample_b]]
                diff_feat = np.absolute(np.subtract(feat_a, feat_b))
                response_c = [
                    model[0].predict_confidence(diff_feat) for model in models
                ]
                response_v = [
                    model[0].predict_value(diff_feat) for model in models
                ]
                results_c.append((np.sum(response_c), 0))
                results_v.append((np.mean(response_v), 0))
                print(sample_a, sample_b, np.sum(response_c),
                      np.mean(response_v))

        plotting_labels = []
        plotting_scores = []
        for res in results_v:
            plotting_labels.append(('_', res[1]))
            plotting_scores.append(('_', res[0]))

        pr_results[fold_index] = generate_precision_recall(
            plotting_labels, plotting_scores)
        roc_results[fold_index] = generate_roc_curve(plotting_labels,
                                                     plotting_scores)
    del models[:]
    return pr_results, roc_results
コード例 #3
0
def plshface(args, parallel_pool):
    matrix_x = []
    matrix_y = []
    plotting_labels = []
    plotting_scores = []
    splits = []

    print('>> EXPLORING DATASET')
    dataset_dict = {value: index for index, value in enumerate(list_of_paths)}
    dataset_list = zip(list_of_paths, list_of_labels)
    dataset_list = set_maximum_samples(dataset_list, number_of_samples=SAMPLES)
    # Split dataset into disjoint sets in terms of subjects and samples
    known_tuples, unknown_tuples = split_known_unknown_sets(
        dataset_list, known_set_size=KNOWN_SET_SIZE)
    known_train, known_test = split_train_test_sets(
        known_tuples, train_set_size=TRAIN_SET_SIZE)
    to_discard, unknown_test = split_train_test_sets(
        unknown_tuples, train_set_size=TRAIN_SET_SIZE)

    print('>> LOADING GALLERY: {0} samples'.format(len(known_train)))
    for counterA, gallery_sample in enumerate(known_train):
        sample_path = gallery_sample[0]
        sample_name = gallery_sample[1]
        sample_index = dataset_dict[sample_path]
        feature_vector = list_of_features[sample_index]
        # create list of feature vectors and their corresponding target values
        matrix_x.append(feature_vector)
        matrix_y.append(sample_name)
        # print(counterA, sample_path, sample_name)

    print('>> SPLITTING POSITIVE/NEGATIVE SETS')
    individuals = list(set(matrix_y))
    os_cmc_score = np.zeros(len(individuals))
    oaa_cmc_score = np.zeros(len(individuals))
    for index in range(0, NUM_HASH):
        splits.append(generate_pos_neg_dict(individuals))

    # Converting list to numpy arrays
    numpy_x = np.array(matrix_x)
    numpy_y = np.array(matrix_y)
    numpy_s = np.array(splits)

    print('>> LEARNING OPEN-SET PLS MODELS:')
    os_models = parallel_pool(
        delayed(learn_plsh_model)(numpy_x, numpy_y, split)
        for split in numpy_s)

    print('>> LEARNING CLOSED-SET ONE-AGAINST-ALL PLS MODELS:')
    oaa_splits = generate_oaa_splits(numpy_y)
    oaa_models = parallel_pool(
        delayed(learn_oaa_pls)(numpy_x, split) for split in oaa_splits)

    print('>> LOADING KNOWN PROBE: {0} samples'.format(len(known_test)))
    for counterB, probe_sample in enumerate(known_test):
        # Obtaining probe feature vector and corresponding identity
        sample_path = probe_sample[0]
        sample_name = probe_sample[1]
        sample_index = dataset_dict[sample_path]
        feature_vector = list_of_features[sample_index]

        # Projecting feature vector to every os_model
        vote_dict = dict(map(lambda vote: (vote, 0), individuals))
        for model in os_models:
            pos_list = [
                key for key, value in model[1].iteritems() if value == 1
            ]
            response = model[0].predict_confidence(feature_vector)
            for pos in pos_list:
                vote_dict[pos] += response

        # Sort open-set vote-list histogram
        vote_list = vote_dict.items()
        vote_list.sort(key=lambda tup: tup[1], reverse=True)
        denominator = np.absolute(
            np.mean([vote_list[1][1], vote_list[2][1], vote_list[3][1]]))
        vote_ratio = vote_list[0][
            1] / denominator if denominator > 0 else vote_list[0][1]
        # Computer cmc score for open-set classification
        for outer in range(0, len(individuals)):
            for inner in range(0, outer + 1):
                if vote_list[inner][0] == sample_name:
                    os_cmc_score[outer] += 1
                    break

        # Sort closed-set responses
        ooa_mls_responses = [
            model[0].predict_confidence(feature_vector) for model in oaa_models
        ]
        ooa_mls_labels = [model[1] for model in oaa_models]
        responses = zip(ooa_mls_labels, ooa_mls_responses)
        responses.sort(key=lambda tup: tup[1], reverse=True)
        # Computer cmc score for closed-set classification
        for outer in range(0, len(individuals)):
            for inner in range(0, outer + 1):
                if responses[inner][0] == sample_name:
                    oaa_cmc_score[outer] += 1
                    break

        # Getting known set plotting relevant information
        plotting_labels.append([(sample_name, 1.0)])
        plotting_scores.append([(sample_name, vote_ratio)])
        # print(counterB, sample_name, vote_ratio, vote_list[0][0], responses[0][0])

    print('>> LOADING UNKNOWN PROBE: {0} samples'.format(len(unknown_tuples)))
    for counterC, probe_sample in enumerate(unknown_test):
        # Obtaining probe feature vector and corresponding identity
        sample_path = probe_sample[0]
        sample_name = probe_sample[1]
        sample_index = dataset_dict[sample_path]
        feature_vector = list_of_features[sample_index]

        # Projecting feature vector to every os_model
        vote_dict = dict(map(lambda vote: (vote, 0), individuals))
        for model in os_models:
            pos_list = [
                key for key, value in model[1].iteritems() if value == 1
            ]
            response = model[0].predict_confidence(feature_vector)
            for pos in pos_list:
                vote_dict[pos] += response

        # Sort open-set vote-list histogram
        vote_list = vote_dict.items()
        vote_list.sort(key=lambda tup: tup[1], reverse=True)
        denominator = np.absolute(
            np.mean([vote_list[1][1], vote_list[2][1], vote_list[3][1]]))
        vote_ratio = vote_list[0][
            1] / denominator if denominator > 0 else vote_list[0][1]

        # Getting unknown set plotting relevant information
        plotting_labels.append([(sample_name, -1.0)])
        plotting_scores.append([(sample_name, vote_ratio)])
        # print(counterC, sample_name, vote_ratio, vote_list[0][0])

    del os_models[:]
    del oaa_models[:]

    os_cmc = np.divide(os_cmc_score, len(known_test))
    oaa_cmc = np.divide(oaa_cmc_score, len(known_test))
    det = generate_det_curve(plotting_labels, plotting_scores)
    pr = generate_precision_recall(plotting_labels, plotting_scores)
    roc = generate_roc_curve(plotting_labels, plotting_scores)
    fscore = compute_fscore(pr)
    return os_cmc, oaa_cmc, det, pr, roc, fscore
コード例 #4
0
def svmhface(args, parallel_pool):
    PATH = str(args.path)
    DATASET = str(args.file)
    NUM_HASH = int(args.hash)
    KNOWN_SET_SIZE = float(args.known_set_size)
    TRAIN_SET_SIZE = float(args.train_set_size)

    print('>> LOADING FEATURES FROM FILE')
    with open(PATH + DATASET, 'rb') as input_file:
        list_of_paths, list_of_labels, list_of_features = pickle.load(
            input_file)

    matrix_x = []
    matrix_y = []
    plotting_labels = []
    plotting_scores = []
    splits = []

    print('>> EXPLORING DATASET')
    dataset_dict = {value: index for index, value in enumerate(list_of_paths)}
    dataset_list = zip(list_of_paths, list_of_labels)
    known_tuples, unknown_tuples = split_known_unknown_sets(
        dataset_list, known_set_size=KNOWN_SET_SIZE)
    known_train, known_test = split_train_test_sets(
        known_tuples, train_set_size=TRAIN_SET_SIZE)

    print('>> LOADING GALLERY: {0} samples'.format(len(known_train)))
    counterA = 0
    for gallery_sample in known_train:
        sample_path = gallery_sample[0]
        sample_name = gallery_sample[1]
        sample_index = dataset_dict[sample_path]
        feature_vector = list_of_features[sample_index]

        matrix_x.append(feature_vector)
        matrix_y.append(sample_name)

        counterA += 1
        print(counterA, sample_path, sample_name)

    print('>> SPLITTING POSITIVE/NEGATIVE SETS')
    individuals = list(set(matrix_y))
    cmc_score = np.zeros(len(individuals))
    for index in range(0, NUM_HASH):
        splits.append(generate_pos_neg_dict(individuals))

    print('>> LEARNING SVM MODELS:')
    numpy_x = np.array(matrix_x)
    numpy_y = np.array(matrix_y)
    numpy_s = np.array(splits)
    models = parallel_pool(
        delayed(learn_svmh_model)(numpy_x, numpy_y, split)
        for split in numpy_s)

    print('>> LOADING KNOWN PROBE: {0} samples'.format(len(known_test)))
    counterB = 0
    for probe_sample in known_test:
        sample_path = probe_sample[0]
        sample_name = probe_sample[1]
        sample_index = dataset_dict[sample_path]
        feature_vector = list_of_features[sample_index]

        vote_dict = dict(map(lambda vote: (vote, 0), individuals))
        for model in models:
            pos_list = [
                key for key, value in model[1].iteritems() if value == 1
            ]
            response = model[0].predict([feature_vector])
            # print(response)
            for pos in pos_list:
                vote_dict[pos] += response
        result = vote_dict.items()
        result.sort(key=lambda tup: tup[1], reverse=True)

        for outer in range(len(individuals)):
            for inner in range(outer + 1):
                if result[inner][0] == sample_name:
                    cmc_score[outer] += 1
                    break

        counterB += 1
        denominator = np.absolute(np.mean([result[1][1], result[2][1]]))
        if denominator > 0:
            output = result[0][1] / denominator
        else:
            output = result[0][1]
        print(counterB, sample_name, result[0][0], output[0])

        # Getting known set plotting relevant information
        plotting_labels.append([(sample_name, 1)])
        plotting_scores.append([(sample_name, output[0])])

    print('>> LOADING UNKNOWN PROBE: {0} samples'.format(len(unknown_tuples)))
    counterC = 0
    for probe_sample in unknown_tuples:
        sample_path = probe_sample[0]
        sample_name = probe_sample[1]
        sample_index = dataset_dict[sample_path]
        feature_vector = list_of_features[sample_index]

        vote_dict = dict(map(lambda vote: (vote, 0), individuals))
        for model in models:
            pos_list = [
                key for key, value in model[1].iteritems() if value == 1
            ]
            response = model[0].predict([feature_vector])
            for pos in pos_list:
                vote_dict[pos] += response
        result = vote_dict.items()
        result.sort(key=lambda tup: tup[1], reverse=True)

        counterC += 1
        denominator = np.absolute(np.mean([result[1][1], result[2][1]]))
        if denominator > 0:
            output = result[0][1] / denominator
        else:
            output = result[0][1]
        print(counterC, sample_name, result[0][0], output[0])

        # Getting unknown set plotting relevant information
        plotting_labels.append([(sample_name, -1)])
        plotting_scores.append([(sample_name, output[0])])

    # cmc_score_norm = np.divide(cmc_score, counterA)
    # generate_cmc_curve(cmc_score_norm, DATASET + '_' + str(NUM_HASH) + '_' + DESCRIPTOR)

    del models[:]
    del list_of_paths[:]
    del list_of_labels[:]
    del list_of_features[:]

    pr = generate_precision_recall(plotting_labels, plotting_scores)
    roc = generate_roc_curve(plotting_labels, plotting_scores)
    return pr, roc
コード例 #5
0
def fcnhface(args, parallel_pool):
    matrix_x = []
    matrix_y = []
    plotting_labels = []
    plotting_scores = []
    models = []
    splits = []

    print('>> EXPLORING DATASET')
    dataset_dict = {value: index for index, value in enumerate(list_of_paths)}
    dataset_list = zip(list_of_paths, list_of_labels)
    dataset_list = set_maximum_samples(dataset_list, number_of_samples=SAMPLES)
    known_tuples, unknown_tuples = split_known_unknown_sets(
        dataset_list, known_set_size=KNOWN_SET_SIZE)
    known_train, known_test = split_train_test_sets(
        known_tuples, train_set_size=TRAIN_SET_SIZE)

    print('>> LOADING GALLERY: {0} samples'.format(len(known_train)))
    counterA = 0
    for gallery_sample in known_train:
        sample_path = gallery_sample[0]
        sample_name = gallery_sample[1]
        sample_index = dataset_dict[sample_path]
        feature_vector = list_of_features[sample_index]

        matrix_x.append(feature_vector)
        matrix_y.append(sample_name)

        counterA += 1
        # print(counterA, sample_path, sample_name)

    print('>> SPLITTING POSITIVE/NEGATIVE SETS')
    individuals = list(set(matrix_y))
    cmc_score = np.zeros(len(individuals))
    for index in range(0, NUM_HASH):
        splits.append(generate_pos_neg_dict(individuals))

    print('>> LEARNING FC MODELS:')
    numpy_x = np.array(matrix_x)
    numpy_y = np.array(matrix_y)
    numpy_s = np.array(splits)

    # models = [learn_fc_model(numpy_x, numpy_y, split) for split in numpy_s]

    models = parallel_pool(
        delayed(learn_fc_model)(numpy_x, numpy_y, split) for split in numpy_s)

    print('>> LOADING KNOWN PROBE: {0} samples'.format(len(known_test)))
    counterB = 0
    for probe_sample in known_test:
        sample_path = probe_sample[0]
        sample_name = probe_sample[1]
        sample_index = dataset_dict[sample_path]
        feature_vector = np.array(list_of_features[sample_index])

        vote_dict = dict(map(lambda vote: (vote, 0), individuals))
        for k in range(0, len(models)):
            pos_list = [
                key for key, value in models[k][1].iteritems() if value == 1
            ]
            pred = models[k][0].predict(
                feature_vector.reshape(1, feature_vector.shape[0]))
            response = pred[0][1]
            #print(response)
            for pos in pos_list:
                vote_dict[pos] += response
        result = vote_dict.items()
        result.sort(key=lambda tup: tup[1], reverse=True)

        for outer in range(len(individuals)):
            for inner in range(outer + 1):
                if result[inner][0] == sample_name:
                    cmc_score[outer] += 1
                    break

        counterB += 1
        denominator = np.absolute(np.mean([result[1][1], result[2][1]]))
        if denominator > 0:
            output = result[0][1] / denominator
        else:
            output = result[0][1]
        # print(counterB, sample_name, result[0][0], output)

        # Getting known set plotting relevant information
        plotting_labels.append([(sample_name, 1)])
        plotting_scores.append([(sample_name, output)])

    print('>> LOADING UNKNOWN PROBE: {0} samples'.format(len(unknown_tuples)))
    counterC = 0
    for probe_sample in unknown_tuples:
        sample_path = probe_sample[0]
        sample_name = probe_sample[1]
        sample_index = dataset_dict[sample_path]
        feature_vector = np.array(list_of_features[sample_index])

        vote_dict = dict(map(lambda vote: (vote, 0), individuals))
        #print (vote_dict)
        for k in range(0, len(models)):
            pos_list = [
                key for key, value in models[k][1].iteritems() if value == 1
            ]
            pred = models[k][0].predict(
                feature_vector.reshape(1, feature_vector.shape[0]))
            response = pred[0][1]
            for pos in pos_list:
                vote_dict[pos] += response
        result = vote_dict.items()
        result.sort(key=lambda tup: tup[1], reverse=True)

        counterC += 1
        denominator = np.absolute(np.mean([result[1][1], result[2][1]]))
        if denominator > 0:
            output = result[0][1] / denominator
        else:
            output = result[0][1]
        # print(counterC, sample_name, result[0][0], output)

        # Getting unknown set plotting relevant information
        plotting_labels.append([(sample_name, -1)])
        plotting_scores.append([(sample_name, output)])

    # cmc_score_norm = np.divide(cmc_score, counterA)
    # generate_cmc_curve(cmc_score_norm, DATASET + '_' + str(NUM_HASH) + '_' + DESCRIPTOR)

    del models[:]

    pr = generate_precision_recall(plotting_labels, plotting_scores)
    roc = generate_roc_curve(plotting_labels, plotting_scores)
    fscore = compute_fscore(pr)
    return pr, roc, fscore
コード例 #6
0
def svm_oneclass(args):
    PATH = str(args.path)
    DATASET = str(args.file)
    DESCRIPTOR = str(args.desc)
    NUM_HASH = int(args.hash)
    IMG_WIDTH = int(args.width)
    IMG_HEIGHT = int(args.height)

    matrix_x = []
    matrix_y = []
    models = []
    splits = []
    nmatrix_x = []
    nmatrix_y = []

    x_train = []
    y_train = []
    nx_train = []
    ny_train = []
    plotting_labels = []
    plotting_scores = []

    vgg_model = None
    if DESCRIPTOR == 'df':
        from vggface import VGGFace
        vgg_model = VGGFace()

    print('>> EXPLORING DATASET')
    dataset_list = load_txt_file(PATH + DATASET)
    known_tuples, unknown_tuples = split_known_unknown_sets(dataset_list,
                                                            known_set_size=0.5)
    known_train, known_test = split_train_test_sets(known_tuples,
                                                    train_set_size=0.5)
    print(known_train)

    counterA = 0
    for gallery_sample in known_train:
        sample_path = gallery_sample[0]
        sample_name = gallery_sample[1]

        gallery_path = PATH + sample_path
        gallery_image = cv.imread(gallery_path, cv.IMREAD_COLOR)

        if DESCRIPTOR == 'hog':
            gallery_image = cv.resize(gallery_image, (IMG_HEIGHT, IMG_WIDTH))
            feature_vector = Descriptor.get_hog(gallery_image)
        elif DESCRIPTOR == 'df':
            feature_vector = Descriptor.get_deep_feature(gallery_image,
                                                         vgg_model,
                                                         layer_name='fc6')

        matrix_x.append(feature_vector)
        matrix_y.append(sample_name)

        counterA += 1
        print(counterA, sample_path, sample_name)

    print('>> GENERATING FILES TO SVM')
    counterSVM = 0
    for feature in matrix_x:
        y_train.insert(counterSVM, 1)
        x_train.insert(counterSVM, {})
        count_inner = 0
        for pos in feature:
            x_train[counterSVM].update({count_inner: pos})
            count_inner += 1
        counterSVM += 1

    print('>> GENERATING THE SVM MODEL')
    x_train_total = x_train + nx_train
    y_train_total = y_train + ny_train
    besthit = 0
    bestn = 0
    bestg = 0
    for n in range(1, 50):
        for g in range(-15, 3):
            nu = n / 100
            gamma = pow(2, g)
            parameters = '-s 2 -t 2'
            parameters = parameters + ' -g ' + str(gamma) + ' -n ' + str(nu)
            m = svm_train(y_train_total, x_train_total, parameters)
            hits = 0
            #print('>> LOADING KNOWN PROBE: {0} samples'.format(len(known_test)))
            counterB = 0
            for probe_sample in known_test:
                sample_path = probe_sample[0]
                sample_name = probe_sample[1]
                query_path = PATH + sample_path
                query_image = cv.imread(query_path, cv.IMREAD_COLOR)
                if DESCRIPTOR == 'hog':
                    query_image = cv.resize(query_image,
                                            (IMG_HEIGHT, IMG_WIDTH))
                    feature_vector = Descriptor.get_hog(query_image)
                elif DESCRIPTOR == 'df':
                    feature_vector = Descriptor.get_deep_feature(
                        query_image, vgg_model)
                count_inner = 0
                x_teste = []
                y_teste = []
                y_teste.insert(0, 1)
                x_teste.insert(0, {})
                for pos in feature_vector:
                    x_teste[0].update({count_inner: pos})
                    count_inner += 1
                p_label, p_acc, p_val = svm_predict(y_teste, x_teste, m)
                counterB += 1
                # Getting known set plotting relevant information
                plotting_labels.append([(sample_name, 1)])
                plotting_scores.append([(sample_name, p_label[0])])
                if p_label[0] == 1:
                    hits = hits + 1

            print('>> LOADING UNKNOWN PROBE: {0} samples'.format(
                len(unknown_tuples)))
            counterC = 0
            for probe_sample in unknown_tuples:
                sample_path = probe_sample[0]
                sample_name = probe_sample[1]
                query_path = PATH + sample_path
                query_image = cv.imread(query_path, cv.IMREAD_COLOR)
                if DESCRIPTOR == 'hog':
                    query_image = cv.resize(query_image,
                                            (IMG_HEIGHT, IMG_WIDTH))
                    feature_vector = Descriptor.get_hog(query_image)
                elif DESCRIPTOR == 'df':
                    feature_vector = Descriptor.get_deep_feature(
                        query_image, vgg_model)

                count_inner = 0
                x_teste = []
                y_teste = []
                y_teste.insert(0, -1)
                x_teste.insert(0, {})
                for pos in feature_vector:
                    x_teste[0].update({count_inner: pos})
                    count_inner += 1
                p_label, p_acc, p_val = svm_predict(y_teste, x_teste, m)
                counterC += 1
                # Getting unknown set plotting relevant information
                plotting_labels.append([(sample_name, -1)])
                plotting_scores.append([(sample_name, p_label[0])])
                if p_label[0] == -1:
                    hits = hits + 1
            if hits > besthit:
                besthit = hits
                bestn = nu
                bestg = gamma
    # cmc_score_norm = np.divide(cmc_score, counterA)
    # generate_cmc_curve(cmc_score_norm, DATASET + '_' + str(NUM_HASH) + '_' + DESCRIPTOR)

    print(besthits)
    print(bestn)
    print(bestg)

    pr = generate_precision_recall(plotting_labels, plotting_scores)
    roc = generate_roc_curve(plotting_labels, plotting_scores)
    return pr, roc
コード例 #7
0
def plshface(args, parallel_pool):
    PATH = str(args.path)
    DATASET = str(args.file)
    DESCRIPTOR = str(args.desc)
    NUM_HASH = int(args.hash)
    IMG_WIDTH = int(args.width)
    IMG_HEIGHT = int(args.height)
    KNOWN_SET_SIZE = float(args.known_set_size)
    TRAIN_SET_SIZE = float(args.train_set_size)

    matrix_x = []
    matrix_y = []
    splits = []

    plotting_labels = []
    plotting_scores = []

    vgg_model = None
    if DESCRIPTOR == 'df':
        from vggface import VGGFace
        vgg_model = VGGFace()

    print('>> EXPLORING DATASET')
    dataset_list = load_txt_file(PATH + DATASET)
    known_tuples, unknown_tuples = split_known_unknown_sets(
        dataset_list, known_set_size=KNOWN_SET_SIZE)
    known_train, known_test = split_train_test_sets(
        known_tuples, train_set_size=TRAIN_SET_SIZE)

    print('>> LOADING GALLERY: {0} samples'.format(len(known_train)))
    counterA = 0
    for gallery_sample in known_train:
        sample_path = gallery_sample[0]
        sample_name = gallery_sample[1]

        counterA += 1
        print(counterA, sample_path, sample_name)

        gallery_path = PATH + sample_path
        gallery_image = cv.imread(gallery_path, cv.IMREAD_COLOR)

        if DESCRIPTOR == 'hog':
            gallery_image = cv.resize(gallery_image, (IMG_HEIGHT, IMG_WIDTH))
            feature_vector = Descriptor.get_hog(gallery_image)
        elif DESCRIPTOR == 'df':
            feature_vector = Descriptor.get_deep_feature(gallery_image,
                                                         vgg_model,
                                                         layer_name='fc6')
        del gallery_image

        matrix_x.append(feature_vector)
        matrix_y.append(sample_name)

    print('>> SPLITTING POSITIVE/NEGATIVE SETS')
    individuals = list(set(matrix_y))
    cmc_score = np.zeros(len(individuals))
    for index in range(0, NUM_HASH):
        splits.append(generate_pos_neg_dict(individuals))

    print('>> LEARNING PLS MODELS:')
    input_list = itertools.izip(splits, itertools.repeat((matrix_x, matrix_y)))
    numpy_x = np.array(matrix_x)
    numpy_y = np.array(matrix_y)
    numpy_s = np.array(splits)
    models = parallel_pool(
        delayed(learn_plsh_model)(numpy_x, numpy_y, split)
        for split in numpy_s)

    print('>> LOADING KNOWN PROBE: {0} samples'.format(len(known_test)))
    counterB = 0
    for probe_sample in known_test:
        sample_path = probe_sample[0]
        sample_name = probe_sample[1]

        query_path = PATH + sample_path
        query_image = cv.imread(query_path, cv.IMREAD_COLOR)
        if DESCRIPTOR == 'hog':
            query_image = cv.resize(query_image, (IMG_HEIGHT, IMG_WIDTH))
            feature_vector = Descriptor.get_hog(query_image)
        elif DESCRIPTOR == 'df':
            feature_vector = Descriptor.get_deep_feature(
                query_image, vgg_model)

        vote_dict = dict(map(lambda vote: (vote, 0), individuals))
        for model in models:
            pos_list = [
                key for key, value in model[1].iteritems() if value == 1
            ]
            response = model[0].predict_confidence(feature_vector)
            for pos in pos_list:
                vote_dict[pos] += response
        result = vote_dict.items()
        result.sort(key=lambda tup: tup[1], reverse=True)

        for outer in range(len(individuals)):
            for inner in range(outer + 1):
                if result[inner][0] == sample_name:
                    cmc_score[outer] += 1
                    break

        counterB += 1
        denominator = np.absolute(np.mean([result[1][1], result[2][1]]))
        if denominator > 0:
            output = result[0][1] / denominator
        else:
            output = result[0][1]
        print(counterB, sample_name, result[0][0], output)

        # Getting known set plotting relevant information
        plotting_labels.append([(sample_name, 1)])
        plotting_scores.append([(sample_name, output)])

    print('>> LOADING UNKNOWN PROBE: {0} samples'.format(len(unknown_tuples)))
    counterC = 0
    for probe_sample in unknown_tuples:
        sample_path = probe_sample[0]
        sample_name = probe_sample[1]

        query_path = PATH + sample_path
        query_image = cv.imread(query_path, cv.IMREAD_COLOR)
        if DESCRIPTOR == 'hog':
            query_image = cv.resize(query_image, (IMG_HEIGHT, IMG_WIDTH))
            feature_vector = Descriptor.get_hog(query_image)
        elif DESCRIPTOR == 'df':
            feature_vector = Descriptor.get_deep_feature(
                query_image, vgg_model)

        vote_dict = dict(map(lambda vote: (vote, 0), individuals))
        for model in models:
            pos_list = [
                key for key, value in model[1].iteritems() if value == 1
            ]
            response = model[0].predict_confidence(feature_vector)
            for pos in pos_list:
                vote_dict[pos] += response
        result = vote_dict.items()
        result.sort(key=lambda tup: tup[1], reverse=True)

        counterC += 1
        denominator = np.absolute(np.mean([result[1][1], result[2][1]]))
        if denominator > 0:
            output = result[0][1] / denominator
        else:
            output = result[0][1]
        print(counterC, sample_name, result[0][0], output)

        # Getting unknown set plotting relevant information
        plotting_labels.append([(sample_name, -1)])
        plotting_scores.append([(sample_name, output)])

    # cmc_score_norm = np.divide(cmc_score, counterA)
    # generate_cmc_curve(cmc_score_norm, DATASET + '_' + str(NUM_HASH) + '_' + DESCRIPTOR)

    pr = generate_precision_recall(plotting_labels, plotting_scores)
    roc = generate_roc_curve(plotting_labels, plotting_scores)
    return pr, roc