def main(): args = parse_option() print(args) output_dir = f'{args.work_dir}/{args.exp_name}' save_path = Path(output_dir) logit_path = save_path / 'logits_netD_eval.pkl' print(f'Use logit from: {logit_path}') logits = pickle.load(open(logit_path, "rb")) score_start_step = (args.p1_step - 5000) score_end_step = args.p1_step score_dict = calculate_scores(logits, start_epoch=score_start_step, end_epoch=score_end_step) sample_weights = score_dict[args.resample_score] print( f'sample_weights mean: {sample_weights.mean()}, var: {sample_weights.var()}, max: {sample_weights.max()}, min: {sample_weights.min()}' ) train_num = 162770 attr_index, not_attr_index = get_celeba_index_with_attr( args.root, args.attr) attr_index = np.array(attr_index) not_attr_index = np.array(not_attr_index) attr_index = attr_index[attr_index < train_num] not_attr_index = not_attr_index[not_attr_index < train_num] attr_weights = sample_weights[attr_index] not_attr_weights = sample_weights[not_attr_index] print(f'attr weights mean: {attr_weights.mean()}') print(f'not attr weights mean: {not_attr_weights.mean()}')
def main(): parser = argparse.ArgumentParser() parser.add_argument("--dataset", "-d", default="color_mnist", type=str) parser.add_argument("--root", "-r", default="./dataset/colour_mnist", type=str, help="dataset dir") parser.add_argument("--work_dir", default="./exp_results", type=str, help="output dir") parser.add_argument("--exp_name", default="colour_mnist", type=str, help="exp name") parser.add_argument("--baseline_exp_name", default="colour_mnist", type=str, help="exp name") parser.add_argument("--model", default="mnistgan", type=str, help="network model") parser.add_argument('--gpu', default='0', type=str, help='id(s) for CUDA_VISIBLE_DEVICES') parser.add_argument('--num_pack', default=1, type=int) parser.add_argument('--batch_size', default=64, type=int) parser.add_argument('--seed', default=1, type=int) parser.add_argument('--num_steps', default=20000, type=int) parser.add_argument('--logit_save_steps', default=100, type=int) parser.add_argument('--decay', default='None', type=str) parser.add_argument('--n_dis', default=1, type=int) parser.add_argument('--p1_step', default=10000, type=int) parser.add_argument('--major_ratio', default=0.99, type=float) parser.add_argument('--num_data', default=10000, type=int) parser.add_argument('--resample_score', type=str) parser.add_argument("--loss_type", default="hinge", type=str, help="loss type") parser.add_argument('--use_eval_logits', type=int) args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu output_dir = f'{args.work_dir}/{args.exp_name}' save_path = Path(output_dir) save_path.mkdir(parents=True, exist_ok=True) baseline_output_dir = f'{args.work_dir}/{args.baseline_exp_name}' baseline_save_path = Path(baseline_output_dir) prefix = args.exp_name.split('/')[-1] set_seed(args.seed) if torch.cuda.is_available(): device = "cuda" cudnn.benchmark = True else: device = "cpu" netG, netD, netD_drs, optG, optD, optD_drs = get_gan_model( dataset_name=args.dataset, model=args.model, drs=True, loss_type=args.loss_type, ) netG_ckpt_path = baseline_save_path / f'checkpoints/netG/netG_{args.p1_step}_steps.pth' netD_ckpt_path = baseline_save_path / f'checkpoints/netD/netD_{args.p1_step}_steps.pth' netD_drs_ckpt_path = baseline_save_path / f'checkpoints/netD/netD_{args.p1_step}_steps.pth' logit_path = baseline_save_path / ('logits_netD_eval.pkl' if args.use_eval_logits == 1 else 'logits_netD_train.pkl') print(f'Use logit from: {logit_path}') logits = pickle.load(open(logit_path, "rb")) score_start_step = args.p1_step - 5000 score_end_step = args.p1_step score_dict = calculate_scores(logits, start_epoch=score_start_step, end_epoch=score_end_step) sample_weights = score_dict[args.resample_score] print(f'sample_weights mean: {sample_weights.mean()}, var: {sample_weights.var()}, max: {sample_weights.max()}, min: {sample_weights.min()}') print_num_params(netG, netD) ds_train = get_predefined_dataset( dataset_name=args.dataset, root=args.root, weights=None, major_ratio=args.major_ratio, num_data=args.num_data ) dl_train = get_dataloader( ds_train, batch_size=args.batch_size, weights=sample_weights if args.resample_score is not None else None) dl_drs = get_dataloader(ds_train, batch_size=args.batch_size, weights=None) data_iter = iter(dl_train) imgs, _, _, _ = next(data_iter) plot_data(imgs, num_per_side=8, save_path=save_path, file_name=f'{prefix}_resampled_train_data_p2', vis=None) plot_score_sort(ds_train, score_dict, save_path=save_path, phase=f'{prefix}_{score_start_step}-{score_end_step}_score', plot_metric_name=args.resample_score) # plot_score_box(ds_train, score_dict, save_path=save_path, phase=f'{prefix}_{score_start_step}-{score_end_step}_box') print(args, netG_ckpt_path, netD_ckpt_path, netD_drs_ckpt_path) # Start training trainer = LogTrainer( output_path=save_path, logit_save_steps=args.logit_save_steps, netD=netD, netG=netG, optD=optD, optG=optG, netG_ckpt_file=netG_ckpt_path, netD_ckpt_file=netD_ckpt_path, netD_drs_ckpt_file=netD_drs_ckpt_path, netD_drs=netD_drs, optD_drs=optD_drs, dataloader_drs=dl_drs, n_dis=args.n_dis, num_steps=args.num_steps, save_steps=1000, vis_steps=100, lr_decay=args.decay, dataloader=dl_train, log_dir=output_dir, print_steps=10, device=device, save_logits=False, ) trainer.train() plot_color_mnist_generator(netG, save_path=save_path, file_name=f'{prefix}-eval_p2') netG_drs = drs.DRS(netG, netD_drs, device=device) # for percentile in np.arange(50, 100, 5): # netG_drs.percentile = percentile percentile = 80 plot_color_mnist_generator(netG_drs, save_path=save_path, file_name=f'{prefix}-eval_drs_percent{percentile}_p2') netG.restore_checkpoint(ckpt_file=netG_ckpt_path) netG.to(device) plot_color_mnist_generator(netG, save_path=save_path, file_name=f'{prefix}-eval_generated_p1')
def main(): parser = argparse.ArgumentParser() parser.add_argument("--dataset", "-d", default="cifar10", type=str) parser.add_argument("--root", "-r", default="./dataset/cifar10", type=str, help="dataset dir") parser.add_argument("--work_dir", default="./exp_results", type=str, help="output dir") parser.add_argument("--exp_name", type=str, help="exp name") parser.add_argument("--baseline_exp_name", type=str, help="exp name") parser.add_argument('--p1_step', default=40000, type=int) parser.add_argument("--model", default="sngan", type=str, help="network model") parser.add_argument("--loss_type", default="hinge", type=str, help="loss type") parser.add_argument('--gpu', default='0', type=str, help='id(s) for CUDA_VISIBLE_DEVICES') parser.add_argument('--num_steps', default=80000, type=int) parser.add_argument('--batch_size', default=64, type=int) parser.add_argument('--seed', default=1, type=int) parser.add_argument('--decay', default='linear', type=str) parser.add_argument('--n_dis', default=5, type=int) parser.add_argument('--resample_score', type=str) parser.add_argument('--gold', action='store_true') parser.add_argument('--topk', action='store_true') args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu output_dir = f'{args.work_dir}/{args.exp_name}' save_path = Path(output_dir) save_path.mkdir(parents=True, exist_ok=True) baseline_output_dir = f'{args.work_dir}/{args.baseline_exp_name}' baseline_save_path = Path(baseline_output_dir) set_seed(args.seed) if torch.cuda.is_available(): device = "cuda" cudnn.benchmark = True else: device = "cpu" prefix = args.exp_name.split('/')[-1] if args.dataset == 'celeba': window = 5000 elif args.dataset == 'cifar10': window = 5000 else: window = 5000 if not args.gold: logit_path = baseline_save_path / 'logits_netD_eval.pkl' print(f'Use logit from: {logit_path}') logits = pickle.load(open(logit_path, "rb")) score_start_step = (args.p1_step - window) score_end_step = args.p1_step score_dict = calculate_scores(logits, start_epoch=score_start_step, end_epoch=score_end_step) sample_weights = score_dict[args.resample_score] print( f'sample_weights mean: {sample_weights.mean()}, var: {sample_weights.var()}, max: {sample_weights.max()}, min: {sample_weights.min()}' ) else: sample_weights = None netG_ckpt_path = baseline_save_path / f'checkpoints/netG/netG_{args.p1_step}_steps.pth' netD_ckpt_path = baseline_save_path / f'checkpoints/netD/netD_{args.p1_step}_steps.pth' netD_drs_ckpt_path = baseline_save_path / f'checkpoints/netD/netD_{args.p1_step}_steps.pth' netG, netD, netD_drs, optG, optD, optD_drs = get_gan_model( dataset_name=args.dataset, model=args.model, loss_type=args.loss_type, drs=True, topk=args.topk, gold=args.gold, ) print(f'model: {args.model} - netD_drs_ckpt_path: {netD_drs_ckpt_path}') print_num_params(netG, netD) ds_train = get_predefined_dataset(dataset_name=args.dataset, root=args.root, weights=None) dl_train = get_dataloader(ds_train, batch_size=args.batch_size, weights=sample_weights) ds_drs = get_predefined_dataset(dataset_name=args.dataset, root=args.root, weights=None) dl_drs = get_dataloader(ds_drs, batch_size=args.batch_size, weights=None) if not args.gold: show_sorted_score_samples(ds_train, score=sample_weights, save_path=save_path, score_name=args.resample_score, plot_name=prefix) print(args) # Start training trainer = LogTrainer( output_path=save_path, netD=netD, netG=netG, optD=optD, optG=optG, netG_ckpt_file=str(netG_ckpt_path), netD_ckpt_file=str(netD_ckpt_path), netD_drs_ckpt_file=str(netD_drs_ckpt_path), netD_drs=netD_drs, optD_drs=optD_drs, dataloader_drs=dl_drs, n_dis=args.n_dis, num_steps=args.num_steps, save_steps=1000, lr_decay=args.decay, dataloader=dl_train, log_dir=output_dir, print_steps=10, device=device, topk=args.topk, gold=args.gold, gold_step=args.p1_step, save_logits=False, ) trainer.train()
transform = transforms.Compose([ transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5), inplace=True), ]) dataset = MultiResolutionDataset(args.root, transform, args.size) logit_path = f'./exp_results/{args.baseline_exp_name}/logits_netD.pkl' print(f'Use logit from: {logit_path}') logits = pickle.load(open(logit_path, "rb")) window = 5000 score_start_step = (args.p1_step - window) score_end_step = args.p1_step + 1 score_dict = calculate_scores(logits, start_epoch=score_start_step, end_epoch=score_end_step) sample_weights = score_dict[args.resample_score] def print_stats(sw): print( f'weight_list max: {sw.max()} min: {sw.min()} mean: {sw.mean()} var: {sw.var()}' ) print_stats(sample_weights) # for k, v in score_dict.items(): # print(k) # print_stats(v) # import ipdb; ipdb.set_trace()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--dataset", "-d", default="cifar10", type=str) parser.add_argument("--work_dir", default="./exp_results", type=str, help="output dir") parser.add_argument("--exp_name", default="mimicry_pretrained-seed1", type=str, help="exp name") parser.add_argument("--baseline_exp_name", type=str, help="exp name") parser.add_argument('--p1_step', default=40000, type=int) parser.add_argument("--model", default="sngan", type=str, help="network model") parser.add_argument("--loss_type", default="hinge", type=str, help="loss type") parser.add_argument('--gpu', default='0', type=str, help='id(s) for CUDA_VISIBLE_DEVICES') parser.add_argument('--batch_size', default=128, type=int) parser.add_argument('--seed', default=1, type=int) parser.add_argument("--netG_ckpt_step", type=int) parser.add_argument("--netG_train_mode", action='store_true') parser.add_argument('--resample_score', type=str) parser.add_argument('--gold', action='store_true') parser.add_argument('--topk', action='store_true') parser.add_argument("--index_num", default=100, type=int, help="number of index to use for FID score") args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu output_dir = f'{args.work_dir}/{args.exp_name}' save_path = Path(output_dir) save_path.mkdir(parents=True, exist_ok=True) baseline_output_dir = f'{args.work_dir}/{args.baseline_exp_name}' baseline_save_path = Path(baseline_output_dir) set_seed(args.seed) if torch.cuda.is_available(): device = "cuda" cudnn.benchmark = True else: device = "cpu" # load model assert args.netG_ckpt_step print(f'load model from {save_path} step: {args.netG_ckpt_step}') netG, _, _, _ = get_gan_model( dataset_name=args.dataset, model=args.model, loss_type=args.loss_type, topk=args.topk, gold=args.gold, ) netG.to(device) if not args.netG_train_mode: netG.eval() if args.dataset == 'celeba': dataset = 'celeba_64' window = 5000 else: dataset = args.dataset window = 5000 logit_path = baseline_save_path / 'logits_netD_eval.pkl' print(f'Use logit from: {logit_path}') logits = pickle.load(open(logit_path, "rb")) score_start_step = (args.p1_step - window) score_end_step = args.p1_step score_dict = calculate_scores(logits, start_epoch=score_start_step, end_epoch=score_end_step) sample_weights = score_dict[args.resample_score] print( f'sample_weights mean: {sample_weights.mean()}, var: {sample_weights.var()}, max: {sample_weights.max()}, min: {sample_weights.min()}') print(args) sort_index = np.argsort(sample_weights) high_index = sort_index[-args.index_num:] low_index = sort_index[:args.index_num] # Evaluate fid with index of high weight evaluate_with_index( metric='fid', index=high_index, log_dir=save_path, netG=netG, dataset=dataset, num_fake_samples=50000, evaluate_step=args.netG_ckpt_step, num_runs=1, device=device, stats_file=None, name=f'high_{args.resample_score}', ) # Evaluate fid with index of low weight evaluate_with_index( metric='fid', index=low_index, log_dir=save_path, netG=netG, dataset=dataset, num_fake_samples=50000, evaluate_step=args.netG_ckpt_step, num_runs=1, device=device, stats_file=None, name=f'low_{args.resample_score}', )
def main(): parser = argparse.ArgumentParser() parser.add_argument("--dataset", "-d", default="color_mnist", type=str) parser.add_argument("--root", "-r", default="./dataset/colour_mnist", type=str, help="dataset dir") parser.add_argument("--baseline_exp_path", default="color_mnist", type=str) parser.add_argument("--resample_exp_path", default="color_mnist", type=str) parser.add_argument('--p1_step', default=15000, type=int) parser.add_argument('--p2_step', default=20000, type=int) parser.add_argument('--resample_score', type=str) parser.add_argument("--use_loss", action='store_true') parser.add_argument('--seed', type=int, default=1) parser.add_argument('--major_ratio', default=0.99, type=float) parser.add_argument('--num_data', default=10000, type=int) parser.add_argument('--name', type=str) args = parser.parse_args() baseline_exp_path = Path(args.baseline_exp_path) resample_exp_path = Path(args.resample_exp_path) if args.use_loss: baseline_ae_loss = np.load( baseline_exp_path / f'cae_checkpoints/{args.p2_step}_steps_seed{args.seed}/cae_training_loss.npy' ) resample_ae_loss = np.load( resample_exp_path / f'cae_checkpoints/{args.p2_step}_steps_seed{args.seed}/cae_training_loss.npy' ) baseline_ae = baseline_ae_loss[:, -1] resample_ae = resample_ae_loss[:, -1] logits = pickle.load(open(baseline_exp_path / 'logits_netD_eval.pkl', "rb")) score_start_step = args.p1_step - 5000 score_end_step = args.p1_step score_dict = calculate_scores(logits, start_epoch=score_start_step, end_epoch=score_end_step) sample_weights = score_dict[args.resample_score] weight_sort_index = np.argsort(sample_weights) test_dict = dict() ds_train = get_predefined_dataset(dataset_name=args.dataset, root=args.root, weights=None, major_ratio=args.major_ratio, num_data=args.num_data) csv_file = f'./re_{args.dataset}_{args.name}.csv' if os.path.exists(csv_file): f = open(csv_file, 'a', newline='') wr = csv.writer(f) else: f = open(csv_file, 'w', newline='') wr = csv.writer(f) wr.writerow( ['Ratio', 'Seed', 'Type', 'Baseline', 'Resample', 'Difference(%)']) test_dict['all'] = weight_sort_index if args.dataset == 'color_mnist': test_dict['green'] = np.where(ds_train.dataset.biased_targets == 1) elif args.dataset == 'mnist_fmnist': test_dict['fmnist'] = np.where(ds_train.dataset.mixed_targets == 1) for idx_name, index in test_dict.items(): baseline_mean = baseline_ae[index].mean() resample_mean = resample_ae[index].mean() baseline_resample_diff = (resample_mean - baseline_mean) / baseline_mean * 100 print( f'{idx_name}, baseline_mean: {baseline_mean}, resample_mean: {resample_mean} diff: {baseline_resample_diff}%' ) wr.writerow([ args.major_ratio, args.seed, idx_name, baseline_mean, resample_mean, baseline_resample_diff ]) f.close()