def collect_args_generate(): parser = argparse.ArgumentParser() parser.add_argument('--experiment', choices=[ 'orig', 'pair', ], default='orig') parser.add_argument('--attribute', type=int, default=31) parser.add_argument('--save_dir', type=str, default='_') parser.add_argument('--latent_file', type=str, default='_') parser.add_argument('--random_seed', type=int, default=0) parser.add_argument('--num_images', type=int, default=175000) parser.add_argument('--number', type=int, default=0) parser.add_argument('--protected_attribute', type=int, default=20) parser.add_argument('--protected_val', type=int, default=0) parser.add_argument('--attr_val', type=int, default=0) parser.set_defaults(cuda=True) opt = vars(parser.parse_args()) attr_list = utils.get_all_attr() opt['attr_name'] = attr_list[opt['attribute']] opt['prot_attr_name'] = attr_list[opt['protected_attribute']] opt['device'] = torch.device('cuda' if opt['cuda'] else 'cpu') opt['dtype'] = torch.float32 if opt['experiment'] == 'pair' and opt['save_dir'] == '_': opt['save_dir'] = 'data/fake_images/{}/'.format(opt['attr_name']) if opt['experiment'] == 'pair' and opt['latent_file'] == '_': opt['latent_file'] = 'record/GAN_model/latent_vectors_{}.pkl'.format( opt['attr_name']) return opt
def collect_args_full_skew(): parser = argparse.ArgumentParser() parser.add_argument('--attribute1', type=int, default=31) parser.add_argument('--attribute2', type=int, default=20) parser.add_argument('--real_data_dir', type=str, default='data/celeba') parser.add_argument('--random_seed', type=int, default=0) parser.add_argument('--test_mode', type=bool, default=False) parser.add_argument('--opp', type=bool, default=False) parser.set_defaults(cuda=True) opt = vars(parser.parse_args()) attr_list = utils.get_all_attr() opt['attr_name1'] = attr_list[opt['attribute1']] opt['attr_name2'] = attr_list[opt['attribute2']] if torch.cuda.is_available(): opt['device'] = torch.device('cuda') else: opt['device'] = torch.device('cpu') opt['dtype'] = torch.float32 opt['total_epochs'] = 20 params_real_train = {'batch_size': 32, 'shuffle': True, 'num_workers': 0} params_real_val = {'batch_size': 64, 'shuffle': False, 'num_workers': 0} data_setting = { 'path': opt['real_data_dir'], 'params_real_train': params_real_train, 'params_real_val': params_real_val, 'attribute1': opt['attribute1'], 'attribute2': opt['attribute2'], 'augment': True } opt['data_setting'] = data_setting if opt['opp']: opt['save_folder'] = 'record/full_skew/attr_{}_{}_opp/'.format( opt['attribute1'], opt['attribute2']) else: opt['save_folder'] = 'record/full_skew/attr_{}_{}/'.format( opt['attribute1'], opt['attribute2']) utils.make_dir('record/full_skew') utils.make_dir(opt['save_folder']) return opt
def collect_args_linear(): parser = argparse.ArgumentParser() parser.add_argument('--attribute', type=int, default=31) parser.add_argument('--protected_attribute', type=int, default=20) parser.add_argument('--random_seed', type=int, default=0) parser.add_argument('--attr_val', type=int, default=0) parser.add_argument('--protected_val', type=int, default=0) parser.add_argument('--number', type=int, default=0) parser.set_defaults(cuda=True) opt = vars(parser.parse_args()) attr_list = utils.get_all_attr() opt['attr_name'] = attr_list[opt['attribute']] opt['prot_attr_name'] = attr_list[opt['protected_attribute']] if torch.cuda.is_available(): opt['device'] = torch.device('cuda') else: opt['device'] = torch.device('cpu') opt['dtype'] = torch.float32 return opt
def collect_args_scores(): parser = argparse.ArgumentParser() parser.add_argument('--attribute', type=int, default=31) parser.add_argument('--model_dir', type=str, default='record/baseline') parser.add_argument('--out_file', type=str, default='_') parser.add_argument('--random_seed', type=int, default=0) parser.add_argument('--num_images', type=int, default=175000) parser.add_argument('--number', type=int, default=0) #parser.set_defaults(cuda=True) opt = vars(parser.parse_args()) attr_list = utils.get_all_attr() opt['attr_name'] = attr_list[opt['attribute']] if torch.cuda.is_available(): opt['device'] = torch.device('cuda') else: opt['device'] = torch.device('cpu') opt['dtype'] = torch.float32 if opt['out_file'] == '_': opt['out_file'] = 'data/fake_images/all_{}_scores.pkl'.format( opt['attr_name']) return opt
def main(opt): attr_list = utils.get_all_attr() attr_name = attr_list[opt['attribute']] #print(attr_name) print(opt) if opt['experiment'] == 'baseline': train = create_dataset_actual( opt['data_setting']['path'], opt['data_setting']['attribute'], opt['data_setting']['protected_attribute'], opt['data_setting']['params_real_train'], opt['data_setting']['augment'], CelebaDataset, number=opt['number']) val = create_dataset_actual(opt['data_setting']['path'], opt['data_setting']['attribute'], opt['data_setting']['protected_attribute'], opt['data_setting']['params_real_val'], False, CelebaDataset, split='valid') val_weight = None test = create_dataset_actual( opt['data_setting']['path'], opt['data_setting']['attribute'], opt['data_setting']['protected_attribute'], opt['data_setting']['params_real_val'], False, CelebaDataset, split='test') elif opt['experiment'] == 'model': train = create_dataset_all(opt['data_setting']['real_params'], opt['data_setting']['fake_params'], opt['data_setting']['params_train'], opt['data_setting']['augment'], CelebaDataset, split='train') elif opt['experiment'] == 'model_inv': train = create_dataset_inv(opt['data_setting']['real_params'], opt['data_setting']['fake_params'], opt['data_setting']['params_train'], opt['data_setting']['augment'], CelebaDataset, split='train') elif opt['experiment'] == 'fake_only': train = create_dataset_reflections(opt['data_setting']['fake_params'], opt['data_setting']['params_train'], opt['data_setting']['augment'], CelebaDataset) if opt['experiment'] in ['model', 'model_inv', 'fake_only']: val = create_dataset_actual( opt['data_setting']['real_params']['path'], opt['data_setting']['real_params']['attribute'], opt['data_setting']['real_params']['protected_attribute'], opt['data_setting']['params_val'], False, CelebaDataset, split='valid') val_weight = utils.compute_class_weight(val, opt['device'], opt['dtype']).cpu().numpy() test = create_dataset_actual( opt['data_setting']['real_params']['path'], opt['data_setting']['real_params']['attribute'], opt['data_setting']['real_params']['protected_attribute'], opt['data_setting']['params_val'], False, CelebaDataset, split='test') # Train the attribute classifier save_path = opt['save_folder'] + '/best.pth' save_path_curr = opt['save_folder'] + '/current.pth' if not opt['test_mode']: print('Starting to train model...') model_path = None if path.exists(save_path_curr): print('Model exists, resuming training') model_path = save_path_curr AC = attribute_classifier(opt['device'], opt['dtype'], modelpath=model_path) for i in range(AC.epoch, opt['total_epochs']): AC.train(train) acc = AC.check_avg_precision(val, weights=val_weight) if (acc > AC.best_acc): AC.best_acc = acc AC.save_model(save_path) AC.save_model(save_path_curr) AC = attribute_classifier(opt['device'], opt['dtype'], modelpath=save_path) val_targets, val_scores = AC.get_scores(val) test_targets, test_scores = AC.get_scores(test) with open(opt['save_folder'] + '/val_scores.pkl', 'wb+') as handle: pickle.dump(val_scores, handle) with open(opt['save_folder'] + '/val_targets.pkl', 'wb+') as handle: pickle.dump(val_targets, handle) with open(opt['save_folder'] + '/test_scores.pkl', 'wb+') as handle: pickle.dump(test_scores, handle) with open(opt['save_folder'] + '/test_targets.pkl', 'wb+') as handle: pickle.dump(test_targets, handle) cal_thresh = utils.calibrated_threshold(val_targets[:, 0], val_scores) f1_score, f1_thresh = utils.get_threshold(val_targets[:, 0], val_scores) val_pred = np.where(val_scores > cal_thresh, 1, 0) test_pred = np.where(test_scores > cal_thresh, 1, 0) ap, ap_std = utils.bootstrap_ap(val_targets[:, 0], val_scores) deo, deo_std = utils.bootstrap_deo(val_targets[:, 1], val_targets[:, 0], val_pred) ba, ba_std = utils.bootstrap_bias_amp(val_targets[:, 1], val_targets[:, 0], val_pred) kl, kl_std = utils.bootstrap_kl(val_targets[:, 1], val_targets[:, 0], val_scores) val_results = { 'AP': ap, 'AP_std': ap_std, 'DEO': deo, 'DEO_std': deo_std, 'BA': ba, 'BA_std': ba_std, 'KL': kl, 'KL_std': kl_std, 'f1_thresh': f1_thresh, 'cal_thresh': cal_thresh, 'opt': opt } print('Validation results: ') print('AP : {:.1f} +- {:.1f}', 100 * ap, 200 * ap_std) print('DEO : {:.1f} +- {:.1f}', 100 * deo, 200 * deo_std) print('BA : {:.1f} +- {:.1f}', 100 * ba, 200 * ba_std) print('KL : {:.1f} +- {:.1f}', kl, 2 * kl) with open(opt['save_folder'] + '/val_results.pkl', 'wb+') as handle: pickle.dump(val_results, handle) ap, ap_std = utils.bootstrap_ap(test_targets[:, 0], test_scores) deo, deo_std = utils.bootstrap_deo(test_targets[:, 1], test_targets[:, 0], test_pred) ba, ba_std = utils.bootstrap_bias_amp(test_targets[:, 1], test_targets[:, 0], test_pred) kl, kl_std = utils.bootstrap_kl(test_targets[:, 1], test_targets[:, 0], test_scores) test_results = { 'AP': ap, 'AP_std': ap_std, 'DEO': deo, 'DEO_std': deo_std, 'BA': ba, 'BA_std': ba_std, 'KL': kl, 'KL_std': kl_std, 'f1_thresh': f1_thresh, 'cal_thresh': cal_thresh, 'opt': opt } print('Test results: ') print('AP : {:.1f} +- {:.1f}', 100 * ap, 200 * ap_std) print('DEO : {:.1f} +- {:.1f}', 100 * deo, 200 * deo_std) print('BA : {:.1f} +- {:.1f}', 100 * ba, 200 * ba_std) print('KL : {:.1f} +- {:.1f}', kl, 2 * kl) with open(opt['save_folder'] + '/test_results.pkl', 'wb+') as handle: pickle.dump(test_results, handle)
def create_experiment_setting(opt): # Uncomment if deterministic run required. #torch.backends.cudnn.deterministic = True #torch.backends.cudnn.benchmark = False #torch.manual_seed(opt['random_seed']) #np.random.seed(opt['random_seed']) attr_list = utils.get_all_attr() attr_name = attr_list[opt['attribute']] opt['device'] = torch.device('cuda' if opt['cuda'] else 'cpu') opt['dtype'] = torch.float32 opt['print_freq'] = 100 opt['total_epochs'] = 20 orig_save = 'record/' if opt['protected_attribute'] != 20: orig_save += 'protected' + attr_list[opt['protected_attribute']] + '/' utils.make_dir('record') utils.make_dir(orig_save) if opt['experiment_name'] == '_': opt['save_folder'] = os.path.join(orig_save + opt['experiment'], attr_name) utils.make_dir(orig_save + opt['experiment']) utils.make_dir(opt['save_folder']) else: opt['save_folder'] = orig_save + opt[ 'experiment_name'] + '/' + attr_name utils.make_dir(orig_save + opt['experiment_name']) utils.make_dir(opt['save_folder']) optimizer_setting = { 'optimizer': torch.optim.Adam, 'lr': 1e-4, 'weight_decay': 0, } opt['optimizer_setting'] = optimizer_setting opt['dropout'] = 0.5 if opt['experiment'] == 'baseline': params_real_train = { 'batch_size': 32, 'shuffle': True, 'num_workers': 0 } params_real_val = { 'batch_size': 64, 'shuffle': False, 'num_workers': 0 } data_setting = { 'path': opt['real_data_dir'], 'params_real_train': params_real_train, 'params_real_val': params_real_val, 'protected_attribute': opt['protected_attribute'], 'attribute': opt['attribute'], 'augment': True } opt['data_setting'] = data_setting elif opt['experiment'] == 'model' or opt['experiment'] == 'fake_only': if opt['fake_data_dir_new'] == '_': if opt['protected_attribute'] != 20: input_path_new = 'data/fake_images/protected' + attr_list[ opt['protected_attribute']] + '/' + attr_name + '/' else: input_path_new = 'data/fake_images/{}/'.format(attr_name) else: input_path_new = opt['fake_data_dir_new'] input_path_orig = opt['fake_data_dir_orig'] #scores = 'data/fake_images/' + attr_name+'_scores.pkl' if opt['fake_scores_target'] == '_': scores = 'data/fake_images/{}_scores.pkl'.format(attr_name) else: scores = opt['fake_scores_target'] if opt['fake_scores_protected'] == '_': domain = 'data/fake_images/all_' + attr_list[ opt['protected_attribute']] + '_scores.pkl' else: domain = opt['fake_scores_protected'] params_train = {'batch_size': 32, 'shuffle': True, 'num_workers': 0} params_val = {'batch_size': 64, 'shuffle': False, 'num_workers': 0} real_params = { 'path': opt['real_data_dir'], 'attribute': opt['attribute'], 'protected_attribute': opt['protected_attribute'], 'number': 0 } fake_params = { 'path_new': input_path_new, 'path_orig': input_path_orig, 'attr_path': scores, 'dom_path': domain, 'range_orig_image': (15000, 175000), 'range_orig_label': (160000, 320000), 'range_new': (0, 160000), } data_setting = { 'real_params': real_params, 'fake_params': fake_params, 'augment': True, 'params_train': params_train, 'params_val': params_val } opt['data_setting'] = data_setting return opt
def main(opt): attr_list = utils.get_all_attr() attr_name1 = attr_list[opt['attribute1']] attr_name2 = attr_list[opt['attribute2']] print(attr_name1, attr_name2) print(opt) if opt['attribute1'] == opt['attribute2']: return train = create_dataset_full_skew(opt['data_setting']['path'], opt['attribute1'], opt['attribute2'], opt['data_setting']['params_real_train'], opt['data_setting']['augment'], CelebaDataset, opp=opt['opp']) val = create_dataset_full_skew(opt['data_setting']['path'], opt['attribute1'], opt['attribute2'], opt['data_setting']['params_real_val'], False, CelebaDataset, split='valid', opp=opt['opp']) save_path = opt['save_folder'] + '/best.pth' save_path_curr = opt['save_folder'] + '/epoch' if not opt['test_mode']: model_path = None AC = attribute_classifier(opt['device'], opt['dtype'], modelpath=model_path) val_weight = None for i in range(AC.epoch, opt['total_epochs']): AC.train(train) acc = AC.check_avg_precision(val, weights=val_weight) if acc > AC.best_acc: AC.best_acc = acc AC.save_model(save_path) AC.save_model(save_path_curr + str(i) + '.pth') AC = attribute_classifier(opt['device'], opt['dtype'], modelpath=save_path) for attr in [opt['attribute1'], opt['attribute2']]: val = create_dataset_actual(opt['data_setting']['path'], attr, 20, opt['data_setting']['params_real_val'], False, CelebaDataset, split='valid') val_targets, val_scores = AC.get_scores(val) with open(opt['save_folder'] + '/val_scores_{}.pkl'.format(attr), 'wb+') as handle: pickle.dump(val_scores, handle) if opt['opp'] and attr == opt['attribute2']: val_targets = 1 - val_targets print('AP for attribute {}: {}', attr, 100 * average_precision_score(val_targets, val_scores))