Beispiel #1
0
def folds_to_sets(f_csv=dir_db + 'journal_data/Pairs/folds_5splits/',
                  dir_out=dir_db + "journal_data/Pairs/sets/"):
    """ Method used to merge 5 fold splits into 3 sets for RFIW (train, val, and test)"""

    f_in = glob.glob(f_csv + '*-folds.csv')

    for file in f_in:
        # each list of pairs <FOLD, LABEL, PAIR_1, PAIR_2>
        f_name = io.file_base(file)
        print("\nProcessing {}\n".format(f_name))

        df_pairs = pd.read_csv(file)

        # merge to form train set
        df_train = df_pairs[(df_pairs['fold'] == 1) | (df_pairs['fold'] == 5)]
        df_train.to_csv(dir_out + "train/" +
                        f_name.replace("-folds", "-train") + ".csv")

        # merge to form val set
        df_val = df_pairs[(df_pairs['fold'] == 2) | (df_pairs['fold'] == 4)]
        df_val.to_csv(dir_out + "val/" + f_name.replace("-folds", "-val") +
                      ".csv")

        # merge to form test set
        df_test = df_pairs[(df_pairs['fold'] == 3)]
        df_test.to_csv(dir_out + "test/" + f_name.replace("-folds", "-test") +
                       ".csv")

        # print stats
        print("{} Training;\t {} Val;\t{} Test".format(
            df_train['fold'].count(), df_val['fold'].count(),
            df_test['fold'].count()))
Beispiel #2
0
def prepare_fids(dir_fid=dir_home() +
                 "/Dropbox/Families_In_The_Wild/Database/Ann/FW_FIDs/",
                 dirs_out=dir_home() +
                 "/Dropbox/Families_In_The_Wild/Database/FIDs/",
                 do_save=False):
    """
    Parses FID CSV files and places in DB. Additionally, checks are made for inconsistency in labels.
    :param dir_fid:
    :param dirs_out:
    :return:
    """

    # load fids (CSV files)
    fid_files = glob.glob(dir_fid + "F????.csv")
    fid_dicts = {io.file_base(f): pd.read_csv(f) for f in fid_files}

    # dfs_fids, fids = [(pd.read_csv(f), io.file_base(f)) for f in fid_files]
    dfs_fams = []
    for fid in fid_dicts:
        # for each fid (i.e., iterate keys of dictionary)
        # print(fid)
        df_rel_mat = fid_dicts[fid]
        col_gender = df_rel_mat.Gender

        # get MIDs
        ids = [i for i, c in enumerate(col_gender) if '-1' not in c]
        tmp = list(range(0, len(ids)))

        same_size, same_contents = helpers.compare_mid_lists(tmp, ids)

        if not (same_size and same_contents):
            logger.error(
                "MIDs and row indices of relationship table differ in {}.".
                format(fid))
        mids = list(np.array(ids) + 1)
        cols = df_rel_mat.columns[mids]
        # df_rel_mat.loc[ids][cols]

        rel_mat = np.array(df_rel_mat.loc[ids][cols])
        success, messages = helpers.check_rel_matrix(rel_mat, fid=fid)
        if not success:
            logger.error(
                "Relationship matrix failed inspection {}.".format(fid))
            [logger.error("\t{}".format(m)) for m in messages]
            continue

        genders = [g for g in list(col_gender[ids])]
        names = list(df_rel_mat.Name[ids])

        df_fam = pd.DataFrame({'MID': mids})
        df_fam = df_fam.join(df_rel_mat.loc[ids][cols])
        df_fam = df_fam.join(pd.DataFrame({'Gender': genders, 'Name': names}))
        if do_save:
            df_fam.to_csv(dirs_out + "/" + fid + "/mid.csv", index=False)
        dfs_fams.append(df_fam)
    return dfs_fams
    # instantiate CNN face detector
    f_model = args.model_path
    cnn_face_detector = dlib.cnn_face_detection_model_v1(f_model)

    dir_fids = glob.glob(dir_images + "F????/")
    dir_fids.sort()

    f_pids = glob.glob(dir_images + "F????/P*.jpg")

    f_pids.sort()

    f_prefix = [f.replace(dir_images, "").replace("/", "_").replace(".jpg", "_") for f in f_pids]

    # f_pids =list(np.array(f_pids)[ids])
    fids = [myio.file_base(myio.filepath(p)) for p in f_pids]
    pids = [myio.file_base(p) for p in f_pids]
    # f_prefix =list( np.array(f_prefix)[ids])
    npids = len(pids)
    print("Processing {} images".format(npids))

    # Second argument indicates that we should upsample the image 1 time (i.e., bigger image to detect of more faces).
    dets = [cnn_face_detector(io.imread(f), 1) for f in f_pids]

    df = pd.DataFrame(columns=['FID', 'PID', 'face_id', 'filename', 'left', 'top', 'right', 'bottom', 'confidence'])

    print("Number of faces detected: {}".format(len(dets)))
    counter = 0
    for faces, prefix in zip(dets, f_prefix):
        # build dataframe of face detections and corresponding metadata
        for i, d in enumerate(faces):
Beispiel #4
0
                             do_init=True)

    dirs_fid, fids = fiwdb.load_fids(args.input)
    ifiles = glob.glob(args.input + "*/MID*/*.jpg")
    ofiles = [
        dout + str(f).replace(args.input, "").replace(".jpg", ".csv")
        for f in ifiles
    ]
    # layers = args.layers
    for ifile in ifiles:
        ofile = dout + str(ifile).replace(args.input, "").replace(
            ".jpg", ".csv")
        if os.path.isfile(ofile):
            continue
        logger.info("Extracting features: {}\n".format(ofile))
        fname = io.file_base(ifile)

        image = caffe_tools.load_prepare_image_vgg(ifile)
        my_net.net.blobs['data'].data[...] = image
        output = my_net.net.forward()

        io.mkdir(io.filepath(ofile))
        feat = my_net.net.blobs[args.layer].data[0]
        np.savetxt(ofile, feat.flatten(), delimiter=',')  # X is an array
#
# # net = net.net
# # # dir_data = '/data/FIDs2/'
# # dir_data = '/data/FIW_Extended/FIDs/'
# # dir_out = '/data/FIW_Extended/feats/vgg_face/'
#
# # fam_dirs = glob.glob(dir_data + "F????/")
        nimages = len(im_files)
        for i in range(nimages):
            in_file = im_files[i]
            f_file = feat_files[i]
            if os.path.exists(f_file):
                continue
            print(f_file)
            # layers = layers
            # ofile = dout + str(ifile).replace(input, "").replace(".jpg", ".csv")
            # if not io.mkdir(io.parent_dir(f_file)):
                # continue
                # continue
            # if os.path.isfile(in_file):

            logger.info("Extracting feature for: {}\n".format(in_file))
            fname = io.file_base(in_file)

            image = caffe_tools.load_prepare_resnet_centerloss(in_file, (112, 96))
            # import pdb
            # pdb.set_trace()
            # my_net.net.blobs['data'].data[...] = image
            output = my_net.net.forward()

            # io.mkdir(io.filepath(in_file))
            feat = my_net.net.blobs[l].data[0]
            logger.info("Writing feature to disk: {}\n".format(f_file))

            np.savetxt(f_file, feat.flatten(), delimiter=',')  # X is an array


#
Beispiel #6
0
sub_dirs = ['father-dau']  #, 'father-son',  'mother-dau', 'mother-son']

dir_root = '/media/jrobby/Seagate Backup Plus Drive1/DATA/Kinship/KinFaceW-II/'
dir_features = dir_root + '/features/fine-tuned/'

dir_results = dir_features + 'results_spca/'
io.mkdir(dir_results)

dir_perms = dir_root + 'perm/'
dir_lists = dir_root + 'meta_data/'

do_pca = True
k = 200
# load experimental settings for 5-fold verification
f_lists = glob.glob(dir_lists + "*.csv")
pair_types = [io.file_base(f) for f in f_lists]

dir_feats = [dir_features + p + "/" for p in sub_dirs]

fold_list = [1, 2, 3, 4, 5]
for ids in fold_list:
    folds, labels, pairs1, pairs2 = kinwild.read_pair_list(f_lists[ids])

    d_out = dir_results + pair_types[ids] + "/"
    io.mkdir(d_out)

    for feature in features:
        print("processing features from layer", feature)
        feats1 = kinwild.load_all_features(dir_feats[ids] + feature + "/",
                                           pairs1)
        feats2 = kinwild.load_all_features(dir_feats[ids] + feature + "/",
Beispiel #7
0
layers = ['conv5_2', 'conv5_3', 'pool5', 'fc6', 'fc7']
layers = ['res5a']
# lid = 4
# sub_dirs = ['father-dau', 'father-son',  'mother-dau', 'mother-son']

dir_root = io.sys_home() + '/Dropbox/Families_In_The_Wild/Database/journal_data/'
dir_features = '/media/jrobby/Seagate Backup Plus Drive1/FIW_dataset/FIW_Extended/features/vgg_face/resnet/'
dir_results = io.sys_home() + '/Dropbox/Families_In_The_Wild/Database/journal_results/verification/res_net/'
io.mkdir(dir_results)

dir_pairs = dir_root + "Pairs/folds_5splits/"

# load experimental settings for 5-fold verification
f_lists = glob.glob(dir_pairs + "*.csv")
pair_types = [io.file_base(f).replace('-folds', '') for f in f_lists]

# dir_feats = [dir_features + p + "/" for p in pair_types]
import os
for i in range(0, 11):
    df_list = pd.read_csv(f_lists[i])
    pair_type = io.file_base(f_lists[i]).replace('-folds', '')
    labels = np.array(df_list.label)
    folds = np.array(df_list.fold)
    for layer in layers:
        fold_list = list(set(folds))
        dir_out = dir_results + pair_type + "/" + layer + "/"
        if os.path.isdir(dir_out) is True:
            do_continue = True
            for fold in fold_list:
                if not os.path.isdir(dir_out + str(fold) + "/"):