def main(): args = get_args() output_path = args.output db = args.db img_size = args.img_size min_score = args.min_score root_path = "./{}_crop/".format(db) mat_path = root_path + "{}.mat".format(db) full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(mat_path, db) out_genders = [] out_ages = [] out_imgs = [] for i in tqdm(range(len(face_score))): if face_score[i] < min_score: continue if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0: continue if ~(0 <= age[i] <= 100): continue if np.isnan(gender[i]): continue out_genders.append(int(gender[i])) out_ages.append(age[i]) img = cv2.imread(root_path + str(full_path[i][0])) out_imgs.append(cv2.resize(img, (img_size, img_size))) np.savez(output_path,image=np.array(out_imgs), gender=np.array(out_genders), age=np.array(out_ages), img_size=img_size)
def plot_age_dist(): mat_path = r"D:\wiki_crop\wiki.mat" db = "wiki" full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(mat_path, db) ok_idx = filter_unusual(full_path, gender, face_score, second_face_score, age) plot.histgram_demo(age[ok_idx])
def main(): args=get_args() rootpath=args.rootpath outfile=args.outfile metafile =args.metafile min_score = args.minscore full_path, dob, gender, photo_taken, face_score, second_face_score, age=utils.get_meta(os.path.join(rootpath,'%s.mat'%metafile),metafile) total = 0 label = [] print("%d images " % len(face_score)) for i in range(len(face_score)): if face_score[i] < min_score: continue if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0: continue if ~(0 <= age[i] <= 100): continue if np.isnan(gender[i]): continue fname=str(full_path[i][0]) label.append([fname, age[i], gender[i]]) total +=1 with open(os.path.join(rootpath,outfile),'w') as f: for line in label: f.write(line[0] + '\t' + str(line[1]) + '\t' + str(line[2]) + '\n') print("filter data") print("total: %d image" %(total)) print('Done!!!')
def main(): args = get_args() parent_folder = args.input + "/" db = args.db newdb_path = parent_folder + "new_database/" mat_path = parent_folder + "{}.mat".format(db) #create folder for new dataset if not os.path.exists(newdb_path): os.makedirs(newdb_path) #create a folder for every age if args.folders: for i in range(101): new_folder_path = newdb_path + ('%03d' % i) if not os.path.exists(new_folder_path): os.makedirs(new_folder_path) full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta( mat_path, db) for i in tqdm(range(len(full_path))): if face_score[i] < 1.0: continue if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0: continue if ~(0 <= age[i] <= 100): continue if np.isnan(gender[i]): continue image = face_recognition.load_image_file(parent_folder + full_path[i][0]) face_locations = face_recognition.face_locations(image) if len(face_locations) == 0: continue face_locations = face_locations[0] img = image[face_locations[0]:face_locations[2], face_locations[3]:face_locations[1], :] resized_image = cv2.resize(img, (224, 224)) name_of_file = full_path[i][0].split('/')[-1].split('.')[0] new_name = name_of_file + "A" + str(age[i]) + ".jpg" if args.folders: cv2.imwrite(newdb_path + ('%03d' % age[i]) + "/" + new_name, resized_image) else: cv2.imwrite(newdb_path + new_name, resized_image)
def main(): args = get_args() output_path = args.output db = args.db max_count = args.max_count img_size = args.img_size min_score = args.min_score max_num_per_file = args.max_num_per_file train_ratio = args.train_ratio root_path = "data/{}_crop/".format(db) mat_path = root_path + "{}.mat".format(db) full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta( mat_path, db) length = len(face_score) # make out filepath outpath_prefix = output_path.split('/')[:-1] outpath_prefix = path_concat(outpath_prefix) filename = output_path.split('/')[-1].split('.')[0] # filter bad images and get passed indexes indexes = get_passed(length, min_score, age, face_score, gender) # shuffle indexes random.shuffle(indexes) effective_length = len(indexes) train_length = int(max_count * train_ratio) if max_count is not None else int( effective_length * train_ratio) print('train_length=%s' % train_length) # spilit meta data train_idxs, val_idxs = split_meta_indexes(indexes, train_length, max_count=max_count) # get meta data from filtered indexes train_metas = make_meta(train_idxs, full_path, age, gender) val_metas = make_meta(val_idxs, full_path, age, gender) # split to mini batchs mini_list = minibatch_gen(train_metas, max_num_per_file) # process and save train data Parallel(n_jobs=-1, verbose=5)([ delayed(process)('train', root_path, img_size, outpath_prefix, filename, i, mini) for i, mini in enumerate(mini_list) ]) # process and save validation data mini_list = minibatch_gen(val_metas, max_num_per_file) Parallel(n_jobs=-1, verbose=5)([ delayed(process)('val', root_path, img_size, outpath_prefix, filename, i, mini) for i, mini in enumerate(mini_list) ])
def eval(env, num): meta = utils.get_meta(env) sender = utils.sender pool = ThreadPoolExecutor(max_workers=1000) logging.info('request num: {}'.format(num)) lam = (60 * 1000.0) / num samples = np.random.poisson(lam, num) for s in samples: pool.submit(sender, meta) time.sleep(s/1000.0)
def main_process(data_path, db, limit=None): mat_path = os.path.join(data_path, db + "_crop", db + ".mat") full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta( mat_path, db) ok_idx = filter_unusual(full_path, gender, face_score, second_face_score, age) meta_file = os.path.join(data_path, db + ".txt") with open(meta_file, 'w') as f: for i in trange(len(ok_idx)): if limit and i >= int(limit): break f.write("%s_crop/%s %d\n" % (db, full_path[i][0], age[i])) return meta_file
def main(): args = get_args() output_path = args.output db = args.db max_count = args.max_count img_size = args.img_size min_score = args.min_score root_path = "data/{}_crop/".format(db) mat_path = root_path + "{}.mat".format(db) full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta( mat_path, db) out_genders = [] out_ages = [] out_imgs = [] length = len(face_score) for i in tqdm(range(length)): if face_score[i] < min_score: continue #if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0: # continue if ~(0 <= age[i] <= 100): continue if np.isnan(gender[i]): continue out_genders.append(int(gender[i])) out_ages.append(age[i]) img = cv2.imread(root_path + str(full_path[i][0]), 1) img = cv2.resize(img, (img_size, img_size)) img = img[..., ::-1] img = np.around(np.transpose(img, (2, 0, 1)) / 255.0, decimals=12) #img = img out_imgs.append(img) if max_count is not None and len(out_imgs) >= max_count: break output = { "image": np.array(out_imgs), "gender": np.array(out_genders), "age": np.array(out_ages), "db": db, "img_size": img_size, "min_score": min_score } scipy.io.savemat(output_path, output)
def main(): args = get_args() output_path = args.output db = args.db img_size = args.img_size min_score = args.min_score root_path = "data/wiki_crop/".format(db) mat_path = root_path + "wiki.mat".format(db) full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta( mat_path, db) out_genders = [] out_ages = [] out_imgs = [] out_faceScore = [] for i in tqdm(range(len(face_score))): if face_score[i] < min_score: continue if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0: continue if ~(0 <= age[i] <= 100): continue if np.isnan(gender[i]): continue out_genders.append(int(gender[i])) out_faceScore.append(int(face_score[i])) out_ages.append(age[i]) img = cv2.imread(root_path + str(full_path[i][0])) out_imgs.append(cv2.resize(img, (img_size, img_size))) output = { "image": np.array(out_imgs), "gender": np.array(out_genders), "age": np.array(out_ages), "db": db, "img_size": img_size, "fscore": np.array(out_faceScore) } scipy.io.savemat(output_path, output) with open('mycsvfilet.csv', 'w') as f: # Just use 'w' mode in 3.x w = csv.DictWriter(f, output.keys()) w.writeheader() w.writerow(output)
def main(): args = get_args() output_path = args.output db = args.db img_size = args.img_size min_score = args.min_score fr = args.train_fraction root_path = "data/{}_crop/".format(db) mat_path = root_path + "{}.mat".format(db) full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta( mat_path, db) out_genders = [] out_ages = [] out_imgs = [] for i in tqdm(range(len(face_score))): if face_score[i] < min_score: continue if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0: continue if ~(0 <= age[i] <= 100): continue if np.isnan(gender[i]): continue out_genders.append(int(gender[i])) out_ages.append(age[i]) img = cv2.imread(root_path + str(full_path[i][0])) out_imgs.append(cv2.resize(img, (img_size, img_size))) number_of_train = int(fr * len(out_imgs)) train_images = out_imgs[0:number_of_train] labels_train_gender = out_genders[0:number_of_train] labels_train_age = out_ages[0:number_of_train] valid_images = out_imgs[number_of_train:] labels_valid_gender = out_genders[number_of_train:] labels_valid_age = out_ages[number_of_train:] tfrecord_train = 'train_' + str(img_size) + '.tfrecords' tfrecord_valid = 'valid_' + str(img_size) + '.tfrecords' create_tfrecord(tfrecord_train, train_images, labels_train_age, labels_train_gender) create_tfrecord(tfrecord_valid, valid_images, labels_valid_age, labels_valid_gender)
def main(): logging.debug("Alaa --> start creating dataBase ...") args = get_args() output_path = args.output db = args.db img_size = args.img_size min_score = args.min_score root_path = "data/{}_crop/".format(db) mat_path = root_path + "{}.mat".format(db) full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta( mat_path, db) out_genders = [] out_ages = [] sample_num = len(face_score) out_imgs = np.empty((sample_num, img_size, img_size, 3), dtype=np.uint8) valid_sample_num = 0 for i in tqdm(range(sample_num)): if face_score[i] < min_score: continue if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0: continue if ~(0 <= age[i] <= 100): continue if np.isnan(gender[i]): continue out_genders.append(int(gender[i])) out_ages.append(age[i]) img = cv2.imread(root_path + str(full_path[i][0])) out_imgs[valid_sample_num] = cv2.resize(img, (img_size, img_size)) valid_sample_num += 1 output = { "image": out_imgs[:valid_sample_num], "gender": np.array(out_genders), "age": np.array(out_ages), "db": db, "img_size": img_size, "min_score": min_score } scipy.io.savemat(output_path, output) logging.debug("Alaa --> succes to create dataBase and save it ...")
def main(): args = get_args() output_path = args.output db = args.db img_size = args.img_size min_score = args.min_score root_path = "/data/chercheurs/agarwals/DEX/{}_crop/".format(db) mat_path = root_path + "{}.mat".format(db) full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta( mat_path, db) out_genders = [] out_ages = [] out_imgs = [] for i in tqdm(range(len(face_score))): if face_score[i] < min_score: continue if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0: continue if ~(0 <= age[i] <= 100): continue if np.isnan(gender[i]): continue out_genders.append(int(gender[i])) out_ages.append(age[i]) img = cv2.imread(root_path + str(full_path[i][0])) out_imgs.append(cv2.resize(img, (img_size, img_size))) output = { "image": np.array(out_imgs), "gender": np.array(out_genders), "age": np.array(out_ages), "db": db, "img_size": img_size, "min_score": min_score } scipy.io.savemat(output_path, output)
def main(): args = get_args() output_path = args.output db = args.db img_size = args.img_size min_score = args.min_score root_path = args.input mat_path = "{}/{}.mat".format(root_path, db) full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta(mat_path, db) out_genders = [] out_ages = [] out_imgs = [] for i in tqdm(range(len(face_score))): if args.max_samples and i >= args.max_samples: print 'Reached maximum samples {}'.format(args.max_samples) break if face_score[i] < min_score: continue if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0: continue if ~(0 <= age[i] <= 100): continue if np.isnan(gender[i]): continue out_genders.append(int(gender[i])) out_ages.append(age[i]) img = cv2.imread(root_path + str(full_path[i][0])) out_imgs.append(cv2.resize(img, (img_size, img_size))) output = {"image": np.array(out_imgs), "gender": np.array(out_genders), "age": np.array(out_ages), "db": db, "img_size": img_size, "min_score": min_score} scipy.io.savemat(output_path, output)
def eval(env): meta = utils.get_meta(env) sender = utils.sender pool = ThreadPoolExecutor(max_workers=10000) # nums = np.arange(100, 1001, 100) # nums = np.append(nums, np.arange(1000, 99, -100)) nums = [] with open('{}/workload/test_2h.csv'.format(folder), 'r') as f: reader = csv.DictReader(f) nums = [ int(row['tweets']) for row in reader ] print(sum(nums)) for i in range(len(nums)): num = nums[i] logging.info('request num: {}'.format(num)) lam = (60 * 1000.0) / num samples = np.random.poisson(lam, num) for s in samples: pool.submit(sender, meta) time.sleep(s/1000.0)
def _load_db(self, db_dir, db): root_path = db_dir mat_path = str(root_path) + "/{}.mat".format(db) full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta( mat_path, db) for i in tqdm(range(len(face_score))): if face_score[i] < self.minscore: continue if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0: continue if ~(0 <= age[i] <= 100): continue if np.isnan(gender[i]): continue # if Path(root_path.name + str(full_path[i][0])).is_file(): self.image_path_and_age_gender.append( [str(root_path+ "/"+str(full_path[i][0])), age[i], gender[i]])
def useful_image_generate(crop_name, db_name): imdb_mat_path = crop_name + os.sep + db_name + '.mat' full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta( imdb_mat_path, db_name) useful_counter = 0 images_full_path = [] images_ages = [] images_genders = [] for i in range(len(face_score)): if face_score[i] < 1.30: continue if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0: continue if ~(0 <= age[i] <= 100): continue if np.isnan(gender[i]): continue if not os.path.exists(crop_name + os.sep + str(full_path[i][0])): continue useful_counter += 1 images_full_path.append(crop_name + os.sep + str(full_path[i][0])) images_ages.append(str(age[i])) images_genders.append(str(int(gender[i]))) with open(crop_name + os.sep + db_name + '.csv', 'w') as f: for i in range(useful_counter): f.write(images_full_path[i] + ',' \ + images_ages[i] + ',' \ + images_genders[i] + '\n')
def main(): args = get_args() output_path = args.output db = args.db img_size = args.img_size root_path = "data/{}_crop/".format(db) mat_path = root_path + "{}.mat".format(db) full_path, dob, age = get_meta(mat_path, db) out_ages = [] out_imgs = [] for i in tqdm(range(len(age))): if ~(0 <= age[i] <= 100): continue out_ages.append(age[i]) img = cv2.imread(root_path + str(full_path[i][0])) out_imgs.append(cv2.resize(img, (img_size, img_size))) output = {"image": np.array(out_imgs), "age": np.array(out_ages), "db": db, "img_size": img_size} scipy.io.savemat(output_path, output)
import matplotlib.pyplot as plt import numpy as np import cv2 from utils import get_meta db = "wiki" # db = "imdb" mat_path = "data/{}_crop/{}.mat".format(db, db) full_path, dob, gender, photo_taken, face_score, second_face_score, age\ = get_meta(mat_path, db) print("#images: {}".format(len(face_score))) print("#images with inf scores: {}".format(np.isinf(face_score).sum())) hist = plt.hist(face_score[face_score > 0], bins=np.arange(0, 8, 0.2), color='b') plt.xlabel("face score") print("#images with second face scores: {}".format( (~np.isnan(second_face_score)).sum())) hist = plt.hist(second_face_score[~np.isnan(second_face_score)], bins=np.arange(0, 8, 0.2), color='b') plt.xlabel("second face score")
def main(): args = get_args() db = args.db min_score = args.min_score validation = args.validation_split root_path = "data/{}_crop/".format(db) mat_path = root_path + "{}.mat".format(db) full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta( mat_path, db) out_genders = [] out_ages = [] out_fullpath = [] for i in tqdm(range(len(face_score))): if face_score[i] < min_score: continue if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0: continue if ~(0 <= age[i] <= 100): continue if np.isnan(gender[i]): continue out_genders.append(int(gender[i])) out_ages.append(age[i]) out_fullpath.append(full_path[i][0]) print(type(out_genders)) data_num = len(out_ages) indexes = np.arange(data_num) np.random.shuffle(indexes) out_fullpath = np.array(out_fullpath)[indexes] out_genders = np.array(out_genders)[indexes] out_ages = np.array(out_ages)[indexes] train_num = int(data_num*(1-validation)) genders_train = out_genders[:train_num] genders_test = out_genders[train_num:] age_train = out_ages[:train_num] age_test = out_ages[train_num:] fullpath_train = out_fullpath[:train_num] fullpath_test = out_fullpath[train_num:] out_name = [name.split("/")[1] for name in out_fullpath] name_train=out_name[:train_num] name_test=out_name[train_num:] train_dir = Path(__file__).resolve().parent.joinpath(root_path,"train") train_dir.mkdir(parents=True, exist_ok=True) train_mat = {"full_path": np.array(name_train), "age": np.array(age_train), "gender": np.array(genders_train), "db": db, "min_score": min_score} print(train_dir) for i in tqdm(range(len(fullpath_train))): copy2(root_path+fullpath_train[i], train_dir) scipy.io.savemat(str(train_dir), train_mat) test_dir = Path(__file__).resolve().parent.joinpath(root_path,"test") test_dir.mkdir(parents=True, exist_ok=True) test_mat = {"full_path": np.array(name_test), "age": np.array(age_test), "gender": np.array(genders_test), "db": db, "min_score": min_score} scipy.io.savemat(str(test_dir), test_mat) for i in tqdm(range(len(fullpath_test))): copy2(root_path+fullpath_test[i], test_dir)
def main(): datadir = '/data/evs/VCTK' version = 'VCTK-Corpus-0.92' # 'VCTK-Corpus-0.92' or 'VCTK-sie' sr = '48k' # '22k' or '48k' # version = 'VCTK-sie' # 'VCTK-Corpus-0.92' or 'VCTK-sie' # sr = '22k' # '22k' or '48k' # speakers that the original authors selected (another source) spk_file = os.path.join(datadir, version, 'speaker-selected.txt') sids_sel = ''.join(open(spk_file).readlines()).split('\n') # # runtime mode # args = parse_args() # interactive mode (comment out before running the script) args = argparse.ArgumentParser() # args.acoustic_dir = '{}/{}/wav{}_silence_trimmed'.format(datadir, version, sr[:2]) args.acoustic_dir = '{}/{}/spec'.format(datadir, version) args.text_dir = '{}/{}/text'.format(datadir, version) args.list_dir = '{}/{}/list'.format(datadir, version) args.metafile = '{}/{}/speaker-info.txt'.format(datadir, version) args.delimiter = '|' args.seed = 0 args.ordered = True args.gender = 'both' args.accents_excluded = ['Indian'] args.mic = 'dual' # print out arguments print('acoustic dir: {}'.format(args.acoustic_dir)) print('text dir: {}'.format(args.text_dir)) print('list dir: {}'.format(args.list_dir)) print('meta file: {}'.format(args.metafile)) print('delimiter: {}'.format(args.delimiter)) print('list random seed: {}'.format(args.seed)) print('sort list by duration: {}'.format(args.ordered)) print('gender: {}'.format(args.gender)) print('accents excluded: {}'.format(args.accents_excluded)) print('microphone: {}'.format(args.mic)) # get speaker info (list of dicts) from meta file speakers = get_meta(args.metafile) # accent-to-speaker distribution from speaker info accent2nspkr = get_accent2nspk(speakers) speakers = sel_speaker(speakers, gender=args.gender, accents_excluded=args.accents_excluded) nspeakers = len(speakers) print('{} speakers selected by gender ({}) and accents (no {})'.format( nspeakers, args.gender, ' '.join(args.accents_excluded))) # get speaker IDs for those being selected sids = sorted(([spk['id'] for spk in speakers])) sids_intersected = sorted([s for s in sids if s in sids_sel]) assert len(sids_intersected)==len(sids_sel), \ 'selected speakers not found in speaker pool!' del sids_intersected # get spaekers IDs for those not being selected sids_unsel = [spk['id'] for spk in speakers if spk['id'] not in sids_sel] for spk in speakers: if spk['id'] in sids_unsel: print(spk) ext = '.spec.npy' # '.spec.npy' or '-feats_norm.npy' # ext = '-feats_norm.npy' # '.spec.npy' or '-feats_norm.npy' listname = 'audio-txt-nframe-nphone_no-indian.txt' # listname = 'wgannorm-txt-nframe-nphone_no-indian.txt' listpath = os.path.join(args.list_dir, listname) if os.path.isfile(listpath): flists_flat = read_flist(listpath) else: # get file list per speaker flists = [] for i, sid in enumerate(sids): print('processing speaker {} ({}/{}) ...'.format( sid, i, nspeakers)) flist = [] feats = sorted( glob.glob( os.path.join(args.acoustic_dir, sid, '*{}'.format(ext)))) # filter by microphone if args.mic != 'dual': feats = [ f for f in feats if 'mic{}{}'.format(args.mic, ext) in f ] for j, feat in enumerate(feats): feat_no_ext = os.path.join( os.path.dirname(feat), os.path.basename(feat).split('.')[0]) uid = os.path.basename(feat).split('.')[0].split('_')[1] txtfile = '{}_{}.phones'.format(sid, uid) txtpath = os.path.join(args.text_dir, sid, txtfile) if os.path.isfile(txtpath): phone_str = open(txtpath).readline().strip() nphones = len(phone_str.split()) log_spectrogram = np.load(feat) nframes = log_spectrogram.shape[0] flist.append([feat_no_ext, phone_str, nframes, nphones]) # append file list for current speaker flists.append(flist) # flatten 2-layer file list (flists) to 1-layer flists_flat = [item for sublist in flists for item in sublist] # write file list write_flist(flists_flat, args.delimiter, listpath) # find sid-uid with both mics if args.mic == 'dual': flists_dual, flists_mono = exclude_mono(flists_flat) flists_all = flists_dual[:] else: flists_all = flists_flat[:] # # find common utterance ids (potentially to choose valid, test set from) # uid2text = get_uid2text(flists_all) # uid2ntexts = get_uid2ntexts(uid2text) # nuids_uniq_text = len([1 for (uid, ntexts) in uid2ntexts.items() if ntexts==1]) # msg = 'uids with single text / # of uids: {} / {}' # print(msg.format(nuids_uniq_text, len(uid2ntexts))) # # # find the dictionary of {sid:ntexts} # sid2nuids = find_sid2nuids(flists_all) # # # select sid with enough utterances # nuids_cutoff = 600 # select sids with at least 600 utterances # nuids_sorted = sorted(sid2nuids.values(), reverse=True) # sids_selected = [sid for sid in sid2nuids.keys() if sid2nuids[sid]>=nuids_cutoff] # get 2-layer file lists (file list per sid) flists = get_2dflist(flists_all) # randomly select 10/20 utterances for validation/testing per speaker flists_train, flists_valid, flists_test = [], [], [] for i, sid in enumerate(sids_sel): flists_sid = flists[sid] uids_sid = get_uids(flists_sid) # randomize utterance ids with fixed random seed random.seed(args.seed) random.shuffle(uids_sid) uids_valid = uids_sid[:10] uids_test = uids_sid[10:30] uids_train = uids_sid[30:] flist_train = sel_flist(uids_train, flists_sid) flist_valid = sel_flist(uids_valid, flists_sid) flist_test = sel_flist(uids_test, flists_sid) flists_train += flist_train flists_valid += flist_valid flists_test += flist_test # write out split file list # listname = 'wgannorm-txt-nframe-nphone_no-indian_train.txt' listname = 'audio-txt-nframe-nphone_no-indian_train.txt' write_flist(flists_train, args.delimiter, os.path.join(args.list_dir, listname)) # listname = 'wgannorm-txt-nframe-nphone_no-indian_valid.txt' listname = 'audio-txt-nframe-nphone_no-indian_valid.txt' write_flist(flists_valid, args.delimiter, os.path.join(args.list_dir, listname)) # listname = 'wgannorm-txt-nframe-nphone_no-indian_test.txt' listname = 'audio-txt-nframe-nphone_no-indian_test.txt' write_flist(flists_test, args.delimiter, os.path.join(args.list_dir, listname))
def show_imgs(img_paths): img_ids = np.random.choice(len(img_paths), img_num, replace=False) for i, img_id in enumerate(img_ids): print(img_id) plt.subplot(rows, cols, i + 1) img = cv2.imread(path_root + str(img_paths[img_id])) plt.title(str(prediction(img))) plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) plt.axis('off') plt.show() mat_path = str(path_root + 'wiki.mat') full_path, dob, gender, photo_taken, face_score, second_face_score, age\ = get_meta(mat_path, 'wiki') img_paths = [] for i in range(len(face_score)): if face_score[i] >= 1.0 and np.isnan(second_face_score[i]): img_paths.append(full_path[i][0]) print("#images with scores >= than 1.0 and no second face: {}".format( len(img_paths))) show_imgs(img_paths)
from utils import get_meta import cv2 import os full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta( "imdb_crop/imdb.mat", "imdb") # preprocess for gender classification def preprocess_for_gen(): print str(full_path.shape[0]) + " images found" countf = 0 countm = 0 countnan = 0 for i in range(full_path.shape[0]): head, tail = os.path.split(full_path[i][0]) if gender[i] == 0.0 and countf <= 5000: newtail = "FF/" + tail _path = "imdb_crop/" + str(full_path[i][0]) t = cv2.imread(_path) tru = cv2.imwrite(newtail, t) if tru: countf += 1 elif gender[i] == 1.0 and countm <= 5000: newtail = "MM/" + tail _path = "imdb_crop/" + str(full_path[i][0])
def __init__(self, source_path, cache_folder, destination, use_in_feeds, translations, default_lang, blog_url, messages): """Initialize post. The base path is the .txt post file. From it we calculate the meta file, as well as any translations available, and the .html fragment file path. `compile_html` is a function that knows how to compile this Post to html. """ self.prev_post = None self.next_post = None self.blog_url = blog_url self.is_draft = False self.source_path = source_path # posts/blah.txt self.post_name = os.path.splitext(source_path)[0] # posts/blah # cache/posts/blah.html self.base_path = os.path.join(cache_folder, self.post_name + ".html") self.metadata_path = self.post_name + ".meta" # posts/blah.meta self.folder = destination self.translations = translations self.default_lang = default_lang self.messages = messages if os.path.isfile(self.metadata_path): with codecs.open(self.metadata_path, "r", "utf8") as meta_file: meta_data = meta_file.readlines() while len(meta_data) < 6: meta_data.append("") (default_title, default_pagename, self.date, self.tags, self.link, default_description) = \ [x.strip() for x in meta_data][:6] else: (default_title, default_pagename, self.date, self.tags, self.link, default_description) = \ utils.get_meta(self.source_path) if not default_title or not default_pagename or not self.date: raise OSError("You must set a title and slug and date!") self.date = utils.to_datetime(self.date) self.tags = [x.strip() for x in self.tags.split(',')] self.tags = filter(None, self.tags) # While draft comes from the tags, it's not really a tag self.use_in_feeds = use_in_feeds and "draft" not in self.tags self.is_draft = 'draft' in self.tags self.tags = [t for t in self.tags if t != 'draft'] self.pagenames = {} self.titles = {} self.descriptions = {} # Load internationalized titles # TODO: this has gotten much too complicated. Rethink. for lang in translations: if lang == default_lang: self.titles[lang] = default_title self.pagenames[lang] = default_pagename self.descriptions[lang] = default_description else: metadata_path = self.metadata_path + "." + lang source_path = self.source_path + "." + lang try: if os.path.isfile(metadata_path): with codecs.open( metadata_path, "r", "utf8") as meta_file: meta_data = [x.strip() for x in meta_file.readlines()] while len(meta_data) < 6: meta_data.append("") self.titles[lang] = meta_data[0] or default_title self.pagenames[lang] = meta_data[1] or\ default_pagename self.descriptions[lang] = meta_data[5] or\ default_description else: ttitle, ppagename, tmp1, tmp2, tmp3, ddescription = \ utils.get_meta(source_path) self.titles[lang] = ttitle or default_title self.pagenames[lang] = ppagename or default_pagename self.descriptions[lang] = ddescription or\ default_description except: self.titles[lang] = default_title self.pagenames[lang] = default_pagename self.descriptions[lang] = default_description
Age -> idade dob-> data de nasci(n é necessário para o pre-process) secondfacescore-> imagens em q há mais de um rosto detectado(descartar) ''' db = "imdb" root_path = "{}_crop/".format(db) mat_path = root_path + "{}.mat".format(db) min_score = 1.00 #Minimo de Confiança do detector de faces img_size = 64 #Resolução minima out_genders = [] out_ages = [] out_imgs = [] full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta( mat_path, db) for i in tqdm(range(len(face_score))): if face_score[i] < min_score: continue if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0: continue if ~(0 <= age[i] <= 100): continue if np.isnan(gender[i]): continue out_genders.append(int(gender[i]))
def main(): args = get_args() output_path = args.output db = args.db # mat_path = args.input max_count = args.max_count img_size = args.img_size min_score = args.min_score max_num_per_file = args.max_num_per_file train_ratio = args.train_ratio root_path = "data/{}_crop/".format(db) mat_path = root_path + "{}.mat".format(db) full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta( mat_path, db) out_genders = [] out_ages = [] out_imgs = [] length = len(face_score) # max_num_per_file = num_per_file file_count = 0 outpath_prefix = output_path.split('/')[:-1] outpath_prefix = path_concat(outpath_prefix) filename = output_path.split('/')[-1].split('.')[0] ext = output_path.split('/')[-1].split('.')[1] total_count = 0 indexes = get_passed(length, min_score, age, face_score, gender) random.shuffle(indexes) effective_length = len(indexes) train_length = int(max_count * train_ratio) if max_count is not None else int( effective_length * train_ratio) print('train_length=%s' % train_length) for i in tqdm(indexes): #print('total_count=%s' % total_count) # if face_score[i] < min_score: # print('face score bad') # continue # if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0: # continue # if ~(0 <= age[i] <= 100): # print('age bad') # continue # if np.isnan(gender[i]): # print('gender bad') # continue out_genders.append(int(gender[i])) out_ages.append(age[i]) img = cv2.imread(root_path + str(full_path[i][0]), 1) img = cv2.resize(img, (img_size, img_size)) img = img[..., ::-1] img = np.around(np.transpose(img, (2, 0, 1)) / 255.0, decimals=12) out_imgs.append(img) total_count += 1 if max_count is not None and total_count >= max_count: break if (len(out_imgs) % max_num_per_file == 0 and len(out_imgs) > 0) or \ total_count == train_length: write_mat(out_imgs, out_genders, out_ages, db, img_size, min_score, total_count, train_length, outpath_prefix, filename, file_count, ext) file_count += 1 if total_count == train_length: print('train range end') file_count = 0 out_imgs = [] out_genders = [] out_ages = [] output = {} write_mat(out_imgs, out_genders, out_ages, db, img_size, min_score, total_count, train_length, outpath_prefix, filename, file_count, ext)
def main(): args = get_args() output_path = args.train_out valout_path = args.val_out db = args.db img_size = args.img_size min_score = args.min_score root_path = "/data/{}_crop/".format(db) mat_path = "./data/{}_crop/".format(db) mat_path = mat_path + "{}.mat".format(db) full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta( mat_path, db) out_genders = [] out_ages = [] #out_imgs = [] img_path = [] val_genders = [] val_ages = [] val_path = [] f_count = 0 m_count = 0 for i in tqdm(range(len(face_score))): if face_score[i] < min_score: continue if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0: continue if ~(0 <= age[i] <= 100): continue if np.isnan(gender[i]): continue if i < (len(face_score) - 10000): out_genders.append(int(gender[i])) if int(gender[i]) == 0: f_count += 1 elif int(gender[i]) == 1: m_count += 1 out_ages.append(age[i]) #img = cv2.imread(root_path + str(full_path[i][0])) img_path.append(root_path + str(full_path[i][0])) #print(full_path[i][0]) #out_imgs.append(cv2.resize(img, (img_size, img_size))) else: val_genders.append(int(gender[i])) val_ages.append(age[i]) val_path.append(root_path + str(full_path[i][0])) output = { "image_path": np.array(img_path), "gender": np.array(out_genders), "age": np.array(out_ages), "db": db, "img_size": img_size, "min_score": min_score } valout = { "image_path": np.array(val_path), "gender": np.array(val_genders), "age": np.array(val_ages), "db": db, "img_size": img_size, "min_score": min_score } #scipy.io.savemat(output_path, output) #scipy.io.savemat(valout_path, valout) print("the femal num ", f_count) print("the male num ", m_count)
def __init__(self, source_path, destination, use_in_feeds, translations, default_lang, blog_url, messages): """Initialize post. The base path is the .txt post file. From it we calculate the meta file, as well as any translations available, and the .html fragment file path. `compile_html` is a function that knows how to compile this Post to html. """ self.prev_post = None self.next_post = None self.blog_url = blog_url self.is_draft = False self.source_path = source_path # posts/blah.txt self.post_name = os.path.splitext(source_path)[0] # posts/blah # cache/posts/blah.html self.base_path = os.path.join('cache', self.post_name + ".html") self.metadata_path = self.post_name + ".meta" # posts/blah.meta self.folder = destination self.translations = translations self.default_lang = default_lang self.messages = messages if os.path.isfile(self.metadata_path): with codecs.open(self.metadata_path, "r", "utf8") as meta_file: meta_data = meta_file.readlines() while len(meta_data) < 6: meta_data.append("") (default_title, default_pagename, self.date, self.tags, self.link, default_description) = \ [x.strip() for x in meta_data][:6] else: (default_title, default_pagename, self.date, self.tags, self.link, default_description) = \ utils.get_meta(self.source_path) if not default_title or not default_pagename or not self.date: raise OSError("You must set a title and slug and date!") self.date = utils.to_datetime(self.date) self.tags = [x.strip() for x in self.tags.split(',')] self.tags = filter(None, self.tags) # While draft comes from the tags, it's not really a tag self.use_in_feeds = use_in_feeds and "draft" not in self.tags self.is_draft = 'draft' in self.tags self.tags = [t for t in self.tags if t != 'draft'] self.pagenames = {} self.titles = {} self.descriptions = {} # Load internationalized titles # TODO: this has gotten much too complicated. Rethink. for lang in translations: if lang == default_lang: self.titles[lang] = default_title self.pagenames[lang] = default_pagename self.descriptions[lang] = default_description else: metadata_path = self.metadata_path + "." + lang source_path = self.source_path + "." + lang try: if os.path.isfile(metadata_path): with codecs.open( metadata_path, "r", "utf8") as meta_file: meta_data = [x.strip() for x in meta_file.readlines()] while len(meta_data) < 6: meta_data.append("") self.titles[lang] = meta_data[0] or default_title self.pagenames[lang] = meta_data[1] or\ default_pagename self.descriptions[lang] = meta_data[5] or\ default_description else: ttitle, ppagename, tmp1, tmp2, tmp3, ddescription = \ utils.get_meta(source_path) self.titles[lang] = ttitle or default_title self.pagenames[lang] = ppagename or default_pagename self.descriptions[lang] = ddescription or\ default_description except: self.titles[lang] = default_title self.pagenames[lang] = default_pagename self.descriptions[lang] = default_description
def __init__(self, source_path, destination, use_in_feeds, translations, default_lang, blog_url, compile_html): """Initialize post. The base path is the .txt post file. From it we calculate the meta file, as well as any translations available, and the .html fragment file path. `compile_html` is a function that knows how to compile this Post to html. """ self.prev_post = None self.next_post = None self.use_in_feeds = use_in_feeds self.blog_url = blog_url self.source_path = source_path # posts/blah.txt self.post_name = os.path.splitext(source_path)[0] # posts/blah self.base_path = os.path.join("cache", self.post_name + ".html") # cache/posts/blah.html self.metadata_path = self.post_name + ".meta" # posts/blah.meta self.folder = destination self.translations = translations self.default_lang = default_lang if os.path.isfile(self.metadata_path): with codecs.open(self.metadata_path, "r", "utf8") as meta_file: meta_data = meta_file.readlines() while len(meta_data) < 5: meta_data.append("") default_title, default_pagename, self.date, self.tags, self.link = [x.strip() for x in meta_data][:5] else: default_title, default_pagename, self.date, self.tags, self.link = utils.get_meta(self.source_path) if not default_title or not default_pagename or not self.date: raise OSError, "You must set a title and slug and date!" self.date = utils.to_datetime(self.date) self.tags = [x.strip() for x in self.tags.split(",")] self.tags = filter(None, self.tags) self.compile_html = compile_html self.pagenames = {} self.titles = {} # Load internationalized titles for lang in translations: if lang == default_lang: self.titles[lang] = default_title self.pagenames[lang] = default_pagename else: metadata_path = self.metadata_path + "." + lang source_path = self.source_path + "." + lang try: if os.path.isfile(metadata_path): with codecs.open(metadata_path, "r", "utf8") as meta_file: meta_data = [x.strip() for x in meta_file.readlines()] while len(meta_data) < 2: meta_data.append("") self.titles[lang] = meta_data[0] or default_title self.pagenames[lang] = meta_data[1] or default_pagename else: ttitle, ppagename, tmp1, tmp2, tmp3 = utils.get_meta(source_path) self.titles[lang] = ttitle or default_title self.pagenames[lang] = ppagename or default_pagename except: self.titles[lang] = default_title self.pagenames[lang] = default_pagename
def main(): args = get_args() output_path = args.output db = args.db img_size = args.img_size min_score = args.min_score if args.db_path: root_path = args.db_path else: root_path = "data/{}_crop/".format(db) mat_path = os.path.join(root_path, "{}.mat".format(db)) full_path, dob, gender, photo_taken, face_score, second_face_score, age = get_meta( mat_path, db) out_genders = [] out_ages = [] sample_num = len(face_score) out_imgs = np.empty((sample_num, img_size, img_size, 3), dtype=np.uint8) valid_sample_num = 0 print(f"root_path = {root_path}") for i in tqdm(range(sample_num)): if face_score[i] < min_score: continue if (~np.isnan(second_face_score[i])) and second_face_score[i] > 0.0: continue if ~(0 <= age[i] <= 100): continue if np.isnan(gender[i]): continue out_genders.append(int(gender[i])) out_ages.append(age[i]) img_path = os.path.join(root_path, str(full_path[i][0])) # print(f"Read {img_path}") img = cv2.imread(img_path) out_imgs[valid_sample_num] = cv2.resize(img, (img_size, img_size)) valid_sample_num += 1 print(f"Saving {len(out_imgs)} items") if args.h5: base, ext = os.path.splitext(output_path) output_path = base + '.h5' h5 = h5py.File(output_path, mode='w') h5.create_dataset('image', data=out_imgs[:valid_sample_num]) h5.create_dataset('gender', data=np.array(out_genders)) h5.create_dataset('age', data=np.array(out_ages)) h5.attrs['db'] = db h5.attrs['img_size'] = img_size h5.attrs['min_score'] = min_score h5.close() print(f"Data has been written to {output_path}.") else: output = { "image": out_imgs[:valid_sample_num], "gender": np.array(out_genders), "age": np.array(out_ages), "db": db, "img_size": img_size, "min_score": min_score } scipy.io.savemat(output_path, output, do_compression=True)