def calculate_fid_for_all_tasks(args, domains, step, mode): print('Calculating FID for all tasks...') fid_values = OrderedDict() for trg_domain in domains: src_domains = [x for x in domains if x != trg_domain] for src_domain in src_domains: task = '%s2%s' % (src_domain, trg_domain) path_real = os.path.join(args.train_img_dir, trg_domain) path_fake = os.path.join(args.eval_dir, task) print('Calculating FID for %s...' % task) fid_value = calculate_fid_given_paths( paths=[path_real, path_fake], img_size=args.img_size, batch_size=args.val_batch_size) fid_values['FID_%s/%s' % (mode, task)] = fid_value # calculate the average FID for all tasks fid_mean = 0 for _, value in fid_values.items(): fid_mean += value / len(fid_values) fid_values['FID_%s/mean' % mode] = fid_mean # report FID values filename = os.path.join(args.eval_dir, 'FID_%.5i_%s.json' % (step, mode)) utils.save_json(fid_values, filename)
def _process_caption_data(phase, ann_file=None, max_length=None): if phase in ['val', 'train']: caption_data = load_json(ann_file) if phase == 'val': caption_data['type'] = 'caption' # id_to_filename is a dictionary such as {image_id: filename]} id_to_filename = { image['id']: image['file_name'] for image in caption_data['images'] } # data is a list of dictionary which contains 'captions', 'file_name' and 'image_id' as key. for i, annotation in enumerate(caption_data['annotations']): image_id = annotation['image_id'] caption_data['annotations'][i]['file_name'] = id_to_filename[ image_id] if phase == 'train': del_idx = [] for i, annotation in enumerate(caption_data['annotations']): caption = annotation['caption'] caption = caption.replace('.', '').replace(',', '').replace( "'", '').replace('"', '') caption = caption.replace('&', 'and').replace('(', '').replace( ')', '').replace('-', ' ') caption = ' '.join(caption.split()) # replace multiple spaces caption_data['annotations'][i]['caption'] = caption.lower() if max_length != None and len(caption.split(' ')) > max_length: del_idx.append(i) # delete captions if size is larger than max_length print("The number of captions before deletion: %d" % len(caption_data['annotations'])) for idx in sorted(del_idx, reverse=True): del caption_data['annotations'][idx] print("The number of captions after deletion: %d" % len(caption_data['annotations'])) save_json(caption_data, os.path.join('data', phase, ann_file.split('/')[-1]))
def calculate_fid_for_all_tasks(args, domains, step, mode, dataset_dir=''): print('Calculating FID for all tasks...') fid_values = OrderedDict() for trg_domain in domains: task = '%s' % trg_domain path_real = args.val_img_dir print('Calculating FID for %s...' % task) fid_value = calculate_fid_given_paths(paths=[path_real, args.eval_dir], img_size=args.img_size, batch_size=args.val_batch_size, trg_domain=trg_domain, dataset_dir=dataset_dir) fid_values['FID_%s/%s' % (mode, task)] = fid_value # calculate the average FID for all tasks fid_mean = 0 for _, value in fid_values.items(): fid_mean += value / len(fid_values) fid_values['FID_%s/mean' % mode] = fid_mean # report FID values filename = os.path.join(args.eval_dir, 'FID_%.5i_%s.json' % (step, mode)) utils.save_json(fid_values, filename) return fid_values, fid_mean
def calculate_metrics(nets, args, step, mode): print('Calculating evaluation metrics...') assert mode in ['latent', 'reference'] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') domains = os.listdir(args.val_img_dir) domains.sort() num_domains = len(domains) print('Number of domains: %d' % num_domains) lpips_dict = OrderedDict() for trg_idx, trg_domain in enumerate(domains): src_domains = [x for x in domains if x != trg_domain] if mode == 'reference': path_ref = os.path.join(args.val_img_dir, trg_domain) loader_ref = get_eval_loader(root=path_ref, img_size=args.img_size, batch_size=args.val_batch_size, imagenet_normalize=False, drop_last=True) for src_idx, src_domain in enumerate(src_domains): path_src = os.path.join(args.val_img_dir, src_domain) loader_src = get_eval_loader(root=path_src, img_size=args.img_size, batch_size=args.val_batch_size, imagenet_normalize=False) task = '%s2%s' % (src_domain, trg_domain) path_fake = os.path.join(args.eval_dir, task) shutil.rmtree(path_fake, ignore_errors=True) os.makedirs(path_fake) lpips_values = [] iter_ref = iter(loader_ref) print('Generating images and calculating LPIPS for %s...' % task) for i, x_src in enumerate(tqdm(loader_src, total=len(loader_src))): N = x_src.size(0) x_src = x_src.to(device) y_trg = torch.tensor([trg_idx] * N).to(device) masks = nets.fan.get_heatmap(x_src) if args.w_hpf > 0 else None # generate 10 outputs from the same input group_of_images = [] for j in range(args.num_outs_per_domain): if mode == 'latent': z_trg = torch.randn(N, args.latent_dim).to(device) s_trg = nets.mapping_network(z_trg, y_trg) else: try: x_ref = next(iter_ref).to(device) except (NameError, StopIteration): iter_ref = iter(loader_ref) x_ref = next(iter_ref).to(device) if x_ref.size(0) > N: x_ref = x_ref[:N] s_trg = nets.style_encoder(x_ref, y_trg) x_fake = nets.generator(x_src, s_trg, masks=masks) group_of_images.append(x_fake) # save generated images to calculate FID later for k in range(N): filename = os.path.join( path_fake, '%.4i_%.2i.png' % (i * args.val_batch_size + (k + 1), j + 1)) utils.save_image(x_fake[k], ncol=1, filename=filename) lpips_value = calculate_lpips_given_images(group_of_images) lpips_values.append(lpips_value) # calculate LPIPS for each task (e.g. cat2dog, dog2cat) lpips_mean = np.array(lpips_values).mean() lpips_dict['LPIPS_%s/%s' % (mode, task)] = lpips_mean # delete dataloaders del loader_src if mode == 'reference': del loader_ref del iter_ref # calculate the average LPIPS for all tasks lpips_mean = 0 for _, value in lpips_dict.items(): lpips_mean += value / len(lpips_dict) lpips_dict['LPIPS_%s/mean' % mode] = lpips_mean # report LPIPS values filename = os.path.join(args.eval_dir, 'LPIPS_%.5i_%s.json' % (step, mode)) utils.save_json(lpips_dict, filename) # calculate and report fid values calculate_fid_for_all_tasks(args, domains, step=step, mode=mode)
def main(): args = parser.parse_args() # phases to be processed. phases = [phase.strip() for phase in args.phases.split(',')] # annotation files to be processed if sorted(phases) == sorted(['train', 'val', 'test' ]) and args.ann_files == '': tmplt = 'data/annotations/captions_%s2017.json' ann_files = [tmplt % 'train', tmplt % 'val', ''] else: ann_files = [ ann_file.strip() for ann_file in args.ann_files.split(',') ] # batch size for extracting feature vectors. batch_size = args.batch_size # maximum length of caption(number of word). if caption is longer than max_length, deleted. max_length = args.max_length # if word occurs less than word_count_threshold in training dataset, the word index is special unknown token. word_count_threshold = args.word_count_threshold vocab_size = args.vocab_size for phase, ann_file in zip(phases, ann_files): _process_caption_data(phase, ann_file=ann_file, max_length=max_length) if phase == 'train': captions_data = load_json('./data/train/captions_train2017.json') word_to_idx = _build_vocab(captions_data, threshold=word_count_threshold, vocab_size=vocab_size) save_json(word_to_idx, './data/word_to_idx.json') new_captions_data = _build_caption_vector(captions_data, word_to_idx=word_to_idx, max_length=max_length) save_json(new_captions_data, ann_file) print('Finished processing caption data') feature_extractor = FeatureExtractor(model_name='resnet101', layer=3) for phase in phases: if not os.path.isdir('./data/%s/feats/' % phase): os.makedirs('./data/%s/feats/' % phase) image_paths = os.listdir('./image/%s/' % phase) dataset = CocoImageDataset(root='./image/%s/' % phase, image_paths=image_paths) data_loader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, num_workers=8) for batch_paths, batch_images in tqdm(data_loader): feats = feature_extractor(batch_images).data.cpu().numpy() feats = feats.reshape(-1, feats.shape[1] * feats.shape[2], feats.shape[-1]) for j in range(len(feats)): np.save('./data/%s/feats/%s.npy' % (phase, batch_paths[j]), feats[j])
def calculate_metrics(nets, args, step, mode, eval_loader): print('Calculating evaluation metrics...') assert mode in ['latent', 'reference'] device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') domains = os.listdir(args.style_dir) domains.sort() num_domains = len(domains) print('Number of domains: %d' % num_domains) #generate_new = True #num_files = sum([len(files) for r, d, files in os.walk(args.eval_dir)]) #print("num_files", num_files, len(eval_loader), (1 + args.num_outs_per_domain)*len(eval_loader)*args.batch_size) #if num_files != (1 + args.num_outs_per_domain)*len(eval_loader): #shutil.rmtree(args.eval_dir, ignore_errors=True) #os.makedirs(args.eval_dir) generate_new = True tcl_dict = {} # prepare for d in range(1, num_domains): src_domain = "style0" trg_domain = "style" + str(d) t1 = '%s2%s' % (src_domain, trg_domain) t2 = '%s2%s' % (trg_domain, src_domain) tcl_dict[t1] = [] tcl_dict[t2] = [] if generate_new: create_task_folders(args, t1) create_task_folders(args, t2) # generate for i, x_src_all in enumerate(tqdm(eval_loader, total=len(eval_loader))): x_real, x_real2, y_org, x_ref, y_trg, mask, flow = x_src_all x_real = x_real.to(device) x_real2 = x_real2.to(device) y_org = y_org.to(device) x_ref = x_ref.to(device) y_trg = y_trg.to(device) mask = mask.to(device) flow = flow.to(device) N = x_real.size(0) masks = nets.fan.get_heatmap(x_real) if args.w_hpf > 0 else None for j in range(args.num_outs_per_domain): if mode == 'latent': z_trg = torch.randn(N, args.latent_dim).to(device) s_trg = nets.mapping_network(z_trg, y_trg) else: s_trg = nets.style_encoder(x_ref, y_trg) x_fake = nets.generator(x_real, s_trg, masks=masks) x_fake2 = nets.generator(x_real2, s_trg, masks=masks) x_warp = warp(x_fake, flow) tcl_err = ((mask*(x_fake2 - x_warp))**2).mean(dim=(1, 2, 3))**0.5 for k in range(N): src_domain = "style" + str(y_org[k].cpu().numpy()) trg_domain = "style" + str(y_trg[k].cpu().numpy()) if src_domain == trg_domain: continue task = '%s2%s' % (src_domain, trg_domain) tcl_dict[task].append(tcl_err[k].cpu().numpy()) path_ref = os.path.join(args.eval_dir, task + "/ref") path_fake = os.path.join(args.eval_dir, task + "/fake") #if not os.path.exists(path_ref): # os.makedirs(path_ref) #if not os.path.exists(path_fake): # os.makedirs(path_fake) if generate_new: filename = os.path.join(path_ref, '%.4i_%.2i.png' % (i*args.val_batch_size+(k+1), j+1)) utils.save_image(x_ref[k], ncol=1, filename=filename) filename = os.path.join(path_fake, '%.4i_%.2i.png' % (i*args.val_batch_size+(k+1), j+1)) utils.save_image(x_fake[k], ncol=1, filename=filename) #filename = os.path.join(args.eval_dir, task + "/tcl_losses.txt") #with open(filename, "a") as text_file: # text_file.write(str(tcl_err[k].cpu().numpy()) + "\n") # evaluate print("computing fid, lpips and tcl") tasks = [dir for dir in os.listdir(args.eval_dir) if os.path.isdir(os.path.join(args.eval_dir, dir))] tasks.sort() # fid and lpips fid_values = OrderedDict() lpips_dict = OrderedDict() tcl_values = OrderedDict() for task in tasks: print(task) path_ref = os.path.join(args.eval_dir, task + "/ref") path_fake = os.path.join(args.eval_dir, task + "/fake") #path_tcl = os.path.join(args.eval_dir, task + "/tcl_losses.txt") fake_group = load_images(path_fake) #with open(path_tcl, "r") as text_file: # tcl_data = text_file.read() #tcl_data = tcl_data.split("\n")[:-1] #tcl_data = [float(td) for td in tcl_data] tcl_data = tcl_dict[task] print("TCL", len(tcl_data)) tcl_mean = np.array(tcl_data).mean() print(tcl_mean) tcl_values['TCL_%s/%s' % (mode, task)] = float(tcl_mean) lpips_values = [] fake_chunks = chunks(fake_group, args.num_outs_per_domain) for cidx in range(len(fake_chunks)): lpips_value = calculate_lpips_given_images(fake_chunks[cidx]) lpips_values.append(lpips_value) print("LPIPS") # calculate LPIPS for each task (e.g. cat2dog, dog2cat) lpips_mean = np.array(lpips_values).mean() lpips_dict['LPIPS_%s/%s' % (mode, task)] = lpips_mean print("FID") fid_value = calculate_fid_given_paths(paths=[path_ref, path_fake], img_size=args.img_size, batch_size=args.val_batch_size) fid_values['FID_%s/%s' % (mode, task)] = fid_value # calculate the average LPIPS for all tasks lpips_mean = 0 for _, value in lpips_dict.items(): lpips_mean += value / len(lpips_dict) lpips_dict['LPIPS_%s/mean' % mode] = lpips_mean # report LPIPS values filename = os.path.join(args.eval_dir, 'LPIPS_%.5i_%s.json' % (step, mode)) utils.save_json(lpips_dict, filename) # calculate the average FID for all tasks fid_mean = 0 for _, value in fid_values.items(): fid_mean += value / len(fid_values) fid_values['FID_%s/mean' % mode] = fid_mean # report FID values filename = os.path.join(args.eval_dir, 'FID_%.5i_%s.json' % (step, mode)) utils.save_json(fid_values, filename) # calculate the average TCL for all tasks tcl_mean = 0 for _, value in tcl_values.items(): print(value, len(tcl_values)) tcl_mean += value / len(tcl_values) print(tcl_mean) tcl_values['TCL_%s/mean' % mode] = float(tcl_mean) # report TCL values filename = os.path.join(args.eval_dir, 'TCL_%.5i_%s.json' % (step, mode)) utils.save_json(tcl_values, filename)