def folds_to_sets(f_csv=dir_db + 'journal_data/Pairs/folds_5splits/', dir_out=dir_db + "journal_data/Pairs/sets/"): """ Method used to merge 5 fold splits into 3 sets for RFIW (train, val, and test)""" f_in = glob.glob(f_csv + '*-folds.csv') for file in f_in: # each list of pairs <FOLD, LABEL, PAIR_1, PAIR_2> f_name = io.file_base(file) print("\nProcessing {}\n".format(f_name)) df_pairs = pd.read_csv(file) # merge to form train set df_train = df_pairs[(df_pairs['fold'] == 1) | (df_pairs['fold'] == 5)] df_train.to_csv(dir_out + "train/" + f_name.replace("-folds", "-train") + ".csv") # merge to form val set df_val = df_pairs[(df_pairs['fold'] == 2) | (df_pairs['fold'] == 4)] df_val.to_csv(dir_out + "val/" + f_name.replace("-folds", "-val") + ".csv") # merge to form test set df_test = df_pairs[(df_pairs['fold'] == 3)] df_test.to_csv(dir_out + "test/" + f_name.replace("-folds", "-test") + ".csv") # print stats print("{} Training;\t {} Val;\t{} Test".format( df_train['fold'].count(), df_val['fold'].count(), df_test['fold'].count()))
def prepare_fids(dir_fid=dir_home() + "/Dropbox/Families_In_The_Wild/Database/Ann/FW_FIDs/", dirs_out=dir_home() + "/Dropbox/Families_In_The_Wild/Database/FIDs/", do_save=False): """ Parses FID CSV files and places in DB. Additionally, checks are made for inconsistency in labels. :param dir_fid: :param dirs_out: :return: """ # load fids (CSV files) fid_files = glob.glob(dir_fid + "F????.csv") fid_dicts = {io.file_base(f): pd.read_csv(f) for f in fid_files} # dfs_fids, fids = [(pd.read_csv(f), io.file_base(f)) for f in fid_files] dfs_fams = [] for fid in fid_dicts: # for each fid (i.e., iterate keys of dictionary) # print(fid) df_rel_mat = fid_dicts[fid] col_gender = df_rel_mat.Gender # get MIDs ids = [i for i, c in enumerate(col_gender) if '-1' not in c] tmp = list(range(0, len(ids))) same_size, same_contents = helpers.compare_mid_lists(tmp, ids) if not (same_size and same_contents): logger.error( "MIDs and row indices of relationship table differ in {}.". format(fid)) mids = list(np.array(ids) + 1) cols = df_rel_mat.columns[mids] # df_rel_mat.loc[ids][cols] rel_mat = np.array(df_rel_mat.loc[ids][cols]) success, messages = helpers.check_rel_matrix(rel_mat, fid=fid) if not success: logger.error( "Relationship matrix failed inspection {}.".format(fid)) [logger.error("\t{}".format(m)) for m in messages] continue genders = [g for g in list(col_gender[ids])] names = list(df_rel_mat.Name[ids]) df_fam = pd.DataFrame({'MID': mids}) df_fam = df_fam.join(df_rel_mat.loc[ids][cols]) df_fam = df_fam.join(pd.DataFrame({'Gender': genders, 'Name': names})) if do_save: df_fam.to_csv(dirs_out + "/" + fid + "/mid.csv", index=False) dfs_fams.append(df_fam) return dfs_fams
# instantiate CNN face detector f_model = args.model_path cnn_face_detector = dlib.cnn_face_detection_model_v1(f_model) dir_fids = glob.glob(dir_images + "F????/") dir_fids.sort() f_pids = glob.glob(dir_images + "F????/P*.jpg") f_pids.sort() f_prefix = [f.replace(dir_images, "").replace("/", "_").replace(".jpg", "_") for f in f_pids] # f_pids =list(np.array(f_pids)[ids]) fids = [myio.file_base(myio.filepath(p)) for p in f_pids] pids = [myio.file_base(p) for p in f_pids] # f_prefix =list( np.array(f_prefix)[ids]) npids = len(pids) print("Processing {} images".format(npids)) # Second argument indicates that we should upsample the image 1 time (i.e., bigger image to detect of more faces). dets = [cnn_face_detector(io.imread(f), 1) for f in f_pids] df = pd.DataFrame(columns=['FID', 'PID', 'face_id', 'filename', 'left', 'top', 'right', 'bottom', 'confidence']) print("Number of faces detected: {}".format(len(dets))) counter = 0 for faces, prefix in zip(dets, f_prefix): # build dataframe of face detections and corresponding metadata for i, d in enumerate(faces):
do_init=True) dirs_fid, fids = fiwdb.load_fids(args.input) ifiles = glob.glob(args.input + "*/MID*/*.jpg") ofiles = [ dout + str(f).replace(args.input, "").replace(".jpg", ".csv") for f in ifiles ] # layers = args.layers for ifile in ifiles: ofile = dout + str(ifile).replace(args.input, "").replace( ".jpg", ".csv") if os.path.isfile(ofile): continue logger.info("Extracting features: {}\n".format(ofile)) fname = io.file_base(ifile) image = caffe_tools.load_prepare_image_vgg(ifile) my_net.net.blobs['data'].data[...] = image output = my_net.net.forward() io.mkdir(io.filepath(ofile)) feat = my_net.net.blobs[args.layer].data[0] np.savetxt(ofile, feat.flatten(), delimiter=',') # X is an array # # # net = net.net # # # dir_data = '/data/FIDs2/' # # dir_data = '/data/FIW_Extended/FIDs/' # # dir_out = '/data/FIW_Extended/feats/vgg_face/' # # # fam_dirs = glob.glob(dir_data + "F????/")
nimages = len(im_files) for i in range(nimages): in_file = im_files[i] f_file = feat_files[i] if os.path.exists(f_file): continue print(f_file) # layers = layers # ofile = dout + str(ifile).replace(input, "").replace(".jpg", ".csv") # if not io.mkdir(io.parent_dir(f_file)): # continue # continue # if os.path.isfile(in_file): logger.info("Extracting feature for: {}\n".format(in_file)) fname = io.file_base(in_file) image = caffe_tools.load_prepare_resnet_centerloss(in_file, (112, 96)) # import pdb # pdb.set_trace() # my_net.net.blobs['data'].data[...] = image output = my_net.net.forward() # io.mkdir(io.filepath(in_file)) feat = my_net.net.blobs[l].data[0] logger.info("Writing feature to disk: {}\n".format(f_file)) np.savetxt(f_file, feat.flatten(), delimiter=',') # X is an array #
sub_dirs = ['father-dau'] #, 'father-son', 'mother-dau', 'mother-son'] dir_root = '/media/jrobby/Seagate Backup Plus Drive1/DATA/Kinship/KinFaceW-II/' dir_features = dir_root + '/features/fine-tuned/' dir_results = dir_features + 'results_spca/' io.mkdir(dir_results) dir_perms = dir_root + 'perm/' dir_lists = dir_root + 'meta_data/' do_pca = True k = 200 # load experimental settings for 5-fold verification f_lists = glob.glob(dir_lists + "*.csv") pair_types = [io.file_base(f) for f in f_lists] dir_feats = [dir_features + p + "/" for p in sub_dirs] fold_list = [1, 2, 3, 4, 5] for ids in fold_list: folds, labels, pairs1, pairs2 = kinwild.read_pair_list(f_lists[ids]) d_out = dir_results + pair_types[ids] + "/" io.mkdir(d_out) for feature in features: print("processing features from layer", feature) feats1 = kinwild.load_all_features(dir_feats[ids] + feature + "/", pairs1) feats2 = kinwild.load_all_features(dir_feats[ids] + feature + "/",
layers = ['conv5_2', 'conv5_3', 'pool5', 'fc6', 'fc7'] layers = ['res5a'] # lid = 4 # sub_dirs = ['father-dau', 'father-son', 'mother-dau', 'mother-son'] dir_root = io.sys_home() + '/Dropbox/Families_In_The_Wild/Database/journal_data/' dir_features = '/media/jrobby/Seagate Backup Plus Drive1/FIW_dataset/FIW_Extended/features/vgg_face/resnet/' dir_results = io.sys_home() + '/Dropbox/Families_In_The_Wild/Database/journal_results/verification/res_net/' io.mkdir(dir_results) dir_pairs = dir_root + "Pairs/folds_5splits/" # load experimental settings for 5-fold verification f_lists = glob.glob(dir_pairs + "*.csv") pair_types = [io.file_base(f).replace('-folds', '') for f in f_lists] # dir_feats = [dir_features + p + "/" for p in pair_types] import os for i in range(0, 11): df_list = pd.read_csv(f_lists[i]) pair_type = io.file_base(f_lists[i]).replace('-folds', '') labels = np.array(df_list.label) folds = np.array(df_list.fold) for layer in layers: fold_list = list(set(folds)) dir_out = dir_results + pair_type + "/" + layer + "/" if os.path.isdir(dir_out) is True: do_continue = True for fold in fold_list: if not os.path.isdir(dir_out + str(fold) + "/"):