Esempio n. 1
0
def collect_args_generate():
    parser = argparse.ArgumentParser()
    parser.add_argument('--experiment',
                        choices=[
                            'orig',
                            'pair',
                        ],
                        default='orig')

    parser.add_argument('--attribute', type=int, default=31)
    parser.add_argument('--save_dir', type=str, default='_')
    parser.add_argument('--latent_file', type=str, default='_')
    parser.add_argument('--random_seed', type=int, default=0)
    parser.add_argument('--num_images', type=int, default=175000)
    parser.add_argument('--number', type=int, default=0)
    parser.add_argument('--protected_attribute', type=int, default=20)
    parser.add_argument('--protected_val', type=int, default=0)
    parser.add_argument('--attr_val', type=int, default=0)
    parser.set_defaults(cuda=True)

    opt = vars(parser.parse_args())

    attr_list = utils.get_all_attr()
    opt['attr_name'] = attr_list[opt['attribute']]
    opt['prot_attr_name'] = attr_list[opt['protected_attribute']]
    opt['device'] = torch.device('cuda' if opt['cuda'] else 'cpu')
    opt['dtype'] = torch.float32

    if opt['experiment'] == 'pair' and opt['save_dir'] == '_':
        opt['save_dir'] = 'data/fake_images/{}/'.format(opt['attr_name'])
    if opt['experiment'] == 'pair' and opt['latent_file'] == '_':
        opt['latent_file'] = 'record/GAN_model/latent_vectors_{}.pkl'.format(
            opt['attr_name'])
    return opt
Esempio n. 2
0
def collect_args_full_skew():
    parser = argparse.ArgumentParser()
    parser.add_argument('--attribute1', type=int, default=31)
    parser.add_argument('--attribute2', type=int, default=20)
    parser.add_argument('--real_data_dir', type=str, default='data/celeba')
    parser.add_argument('--random_seed', type=int, default=0)
    parser.add_argument('--test_mode', type=bool, default=False)
    parser.add_argument('--opp', type=bool, default=False)
    parser.set_defaults(cuda=True)

    opt = vars(parser.parse_args())

    attr_list = utils.get_all_attr()
    opt['attr_name1'] = attr_list[opt['attribute1']]
    opt['attr_name2'] = attr_list[opt['attribute2']]
    if torch.cuda.is_available():
        opt['device'] = torch.device('cuda')
    else:
        opt['device'] = torch.device('cpu')
    opt['dtype'] = torch.float32
    opt['total_epochs'] = 20
    params_real_train = {'batch_size': 32, 'shuffle': True, 'num_workers': 0}

    params_real_val = {'batch_size': 64, 'shuffle': False, 'num_workers': 0}

    data_setting = {
        'path': opt['real_data_dir'],
        'params_real_train': params_real_train,
        'params_real_val': params_real_val,
        'attribute1': opt['attribute1'],
        'attribute2': opt['attribute2'],
        'augment': True
    }
    opt['data_setting'] = data_setting
    if opt['opp']:
        opt['save_folder'] = 'record/full_skew/attr_{}_{}_opp/'.format(
            opt['attribute1'], opt['attribute2'])
    else:
        opt['save_folder'] = 'record/full_skew/attr_{}_{}/'.format(
            opt['attribute1'], opt['attribute2'])
    utils.make_dir('record/full_skew')
    utils.make_dir(opt['save_folder'])
    return opt
Esempio n. 3
0
def collect_args_linear():
    parser = argparse.ArgumentParser()
    parser.add_argument('--attribute', type=int, default=31)
    parser.add_argument('--protected_attribute', type=int, default=20)
    parser.add_argument('--random_seed', type=int, default=0)
    parser.add_argument('--attr_val', type=int, default=0)
    parser.add_argument('--protected_val', type=int, default=0)
    parser.add_argument('--number', type=int, default=0)
    parser.set_defaults(cuda=True)

    opt = vars(parser.parse_args())

    attr_list = utils.get_all_attr()
    opt['attr_name'] = attr_list[opt['attribute']]
    opt['prot_attr_name'] = attr_list[opt['protected_attribute']]
    if torch.cuda.is_available():
        opt['device'] = torch.device('cuda')
    else:
        opt['device'] = torch.device('cpu')
    opt['dtype'] = torch.float32

    return opt
Esempio n. 4
0
def collect_args_scores():
    parser = argparse.ArgumentParser()
    parser.add_argument('--attribute', type=int, default=31)
    parser.add_argument('--model_dir', type=str, default='record/baseline')
    parser.add_argument('--out_file', type=str, default='_')
    parser.add_argument('--random_seed', type=int, default=0)
    parser.add_argument('--num_images', type=int, default=175000)
    parser.add_argument('--number', type=int, default=0)
    #parser.set_defaults(cuda=True)

    opt = vars(parser.parse_args())
    attr_list = utils.get_all_attr()
    opt['attr_name'] = attr_list[opt['attribute']]
    if torch.cuda.is_available():
        opt['device'] = torch.device('cuda')
    else:
        opt['device'] = torch.device('cpu')
    opt['dtype'] = torch.float32
    if opt['out_file'] == '_':
        opt['out_file'] = 'data/fake_images/all_{}_scores.pkl'.format(
            opt['attr_name'])

    return opt
Esempio n. 5
0
def main(opt):
    attr_list = utils.get_all_attr()
    attr_name = attr_list[opt['attribute']]

    #print(attr_name)
    print(opt)

    if opt['experiment'] == 'baseline':
        train = create_dataset_actual(
            opt['data_setting']['path'],
            opt['data_setting']['attribute'],
            opt['data_setting']['protected_attribute'],
            opt['data_setting']['params_real_train'],
            opt['data_setting']['augment'],
            CelebaDataset,
            number=opt['number'])

        val = create_dataset_actual(opt['data_setting']['path'],
                                    opt['data_setting']['attribute'],
                                    opt['data_setting']['protected_attribute'],
                                    opt['data_setting']['params_real_val'],
                                    False,
                                    CelebaDataset,
                                    split='valid')
        val_weight = None

        test = create_dataset_actual(
            opt['data_setting']['path'],
            opt['data_setting']['attribute'],
            opt['data_setting']['protected_attribute'],
            opt['data_setting']['params_real_val'],
            False,
            CelebaDataset,
            split='test')

    elif opt['experiment'] == 'model':
        train = create_dataset_all(opt['data_setting']['real_params'],
                                   opt['data_setting']['fake_params'],
                                   opt['data_setting']['params_train'],
                                   opt['data_setting']['augment'],
                                   CelebaDataset,
                                   split='train')

    elif opt['experiment'] == 'model_inv':
        train = create_dataset_inv(opt['data_setting']['real_params'],
                                   opt['data_setting']['fake_params'],
                                   opt['data_setting']['params_train'],
                                   opt['data_setting']['augment'],
                                   CelebaDataset,
                                   split='train')

    elif opt['experiment'] == 'fake_only':
        train = create_dataset_reflections(opt['data_setting']['fake_params'],
                                           opt['data_setting']['params_train'],
                                           opt['data_setting']['augment'],
                                           CelebaDataset)

    if opt['experiment'] in ['model', 'model_inv', 'fake_only']:
        val = create_dataset_actual(
            opt['data_setting']['real_params']['path'],
            opt['data_setting']['real_params']['attribute'],
            opt['data_setting']['real_params']['protected_attribute'],
            opt['data_setting']['params_val'],
            False,
            CelebaDataset,
            split='valid')

        val_weight = utils.compute_class_weight(val, opt['device'],
                                                opt['dtype']).cpu().numpy()

        test = create_dataset_actual(
            opt['data_setting']['real_params']['path'],
            opt['data_setting']['real_params']['attribute'],
            opt['data_setting']['real_params']['protected_attribute'],
            opt['data_setting']['params_val'],
            False,
            CelebaDataset,
            split='test')

    # Train the attribute classifier
    save_path = opt['save_folder'] + '/best.pth'
    save_path_curr = opt['save_folder'] + '/current.pth'
    if not opt['test_mode']:
        print('Starting to train model...')
        model_path = None
        if path.exists(save_path_curr):
            print('Model exists, resuming training')
            model_path = save_path_curr
        AC = attribute_classifier(opt['device'],
                                  opt['dtype'],
                                  modelpath=model_path)
        for i in range(AC.epoch, opt['total_epochs']):
            AC.train(train)
            acc = AC.check_avg_precision(val, weights=val_weight)
            if (acc > AC.best_acc):
                AC.best_acc = acc
                AC.save_model(save_path)
            AC.save_model(save_path_curr)

    AC = attribute_classifier(opt['device'], opt['dtype'], modelpath=save_path)
    val_targets, val_scores = AC.get_scores(val)
    test_targets, test_scores = AC.get_scores(test)

    with open(opt['save_folder'] + '/val_scores.pkl', 'wb+') as handle:
        pickle.dump(val_scores, handle)
    with open(opt['save_folder'] + '/val_targets.pkl', 'wb+') as handle:
        pickle.dump(val_targets, handle)
    with open(opt['save_folder'] + '/test_scores.pkl', 'wb+') as handle:
        pickle.dump(test_scores, handle)
    with open(opt['save_folder'] + '/test_targets.pkl', 'wb+') as handle:
        pickle.dump(test_targets, handle)

    cal_thresh = utils.calibrated_threshold(val_targets[:, 0], val_scores)
    f1_score, f1_thresh = utils.get_threshold(val_targets[:, 0], val_scores)
    val_pred = np.where(val_scores > cal_thresh, 1, 0)
    test_pred = np.where(test_scores > cal_thresh, 1, 0)

    ap, ap_std = utils.bootstrap_ap(val_targets[:, 0], val_scores)
    deo, deo_std = utils.bootstrap_deo(val_targets[:, 1], val_targets[:, 0],
                                       val_pred)
    ba, ba_std = utils.bootstrap_bias_amp(val_targets[:, 1], val_targets[:, 0],
                                          val_pred)
    kl, kl_std = utils.bootstrap_kl(val_targets[:, 1], val_targets[:, 0],
                                    val_scores)

    val_results = {
        'AP': ap,
        'AP_std': ap_std,
        'DEO': deo,
        'DEO_std': deo_std,
        'BA': ba,
        'BA_std': ba_std,
        'KL': kl,
        'KL_std': kl_std,
        'f1_thresh': f1_thresh,
        'cal_thresh': cal_thresh,
        'opt': opt
    }

    print('Validation results: ')
    print('AP : {:.1f} +- {:.1f}', 100 * ap, 200 * ap_std)
    print('DEO : {:.1f} +- {:.1f}', 100 * deo, 200 * deo_std)
    print('BA : {:.1f} +- {:.1f}', 100 * ba, 200 * ba_std)
    print('KL : {:.1f} +- {:.1f}', kl, 2 * kl)

    with open(opt['save_folder'] + '/val_results.pkl', 'wb+') as handle:
        pickle.dump(val_results, handle)

    ap, ap_std = utils.bootstrap_ap(test_targets[:, 0], test_scores)
    deo, deo_std = utils.bootstrap_deo(test_targets[:, 1], test_targets[:, 0],
                                       test_pred)
    ba, ba_std = utils.bootstrap_bias_amp(test_targets[:, 1],
                                          test_targets[:, 0], test_pred)
    kl, kl_std = utils.bootstrap_kl(test_targets[:, 1], test_targets[:, 0],
                                    test_scores)

    test_results = {
        'AP': ap,
        'AP_std': ap_std,
        'DEO': deo,
        'DEO_std': deo_std,
        'BA': ba,
        'BA_std': ba_std,
        'KL': kl,
        'KL_std': kl_std,
        'f1_thresh': f1_thresh,
        'cal_thresh': cal_thresh,
        'opt': opt
    }

    print('Test results: ')
    print('AP : {:.1f} +- {:.1f}', 100 * ap, 200 * ap_std)
    print('DEO : {:.1f} +- {:.1f}', 100 * deo, 200 * deo_std)
    print('BA : {:.1f} +- {:.1f}', 100 * ba, 200 * ba_std)
    print('KL : {:.1f} +- {:.1f}', kl, 2 * kl)

    with open(opt['save_folder'] + '/test_results.pkl', 'wb+') as handle:
        pickle.dump(test_results, handle)
Esempio n. 6
0
def create_experiment_setting(opt):

    # Uncomment if deterministic run required.
    #torch.backends.cudnn.deterministic = True
    #torch.backends.cudnn.benchmark = False
    #torch.manual_seed(opt['random_seed'])
    #np.random.seed(opt['random_seed'])

    attr_list = utils.get_all_attr()
    attr_name = attr_list[opt['attribute']]
    opt['device'] = torch.device('cuda' if opt['cuda'] else 'cpu')
    opt['dtype'] = torch.float32
    opt['print_freq'] = 100
    opt['total_epochs'] = 20
    orig_save = 'record/'
    if opt['protected_attribute'] != 20:
        orig_save += 'protected' + attr_list[opt['protected_attribute']] + '/'
    utils.make_dir('record')
    utils.make_dir(orig_save)
    if opt['experiment_name'] == '_':
        opt['save_folder'] = os.path.join(orig_save + opt['experiment'],
                                          attr_name)
        utils.make_dir(orig_save + opt['experiment'])
        utils.make_dir(opt['save_folder'])
    else:
        opt['save_folder'] = orig_save + opt[
            'experiment_name'] + '/' + attr_name

        utils.make_dir(orig_save + opt['experiment_name'])
        utils.make_dir(opt['save_folder'])

    optimizer_setting = {
        'optimizer': torch.optim.Adam,
        'lr': 1e-4,
        'weight_decay': 0,
    }
    opt['optimizer_setting'] = optimizer_setting
    opt['dropout'] = 0.5

    if opt['experiment'] == 'baseline':

        params_real_train = {
            'batch_size': 32,
            'shuffle': True,
            'num_workers': 0
        }

        params_real_val = {
            'batch_size': 64,
            'shuffle': False,
            'num_workers': 0
        }
        data_setting = {
            'path': opt['real_data_dir'],
            'params_real_train': params_real_train,
            'params_real_val': params_real_val,
            'protected_attribute': opt['protected_attribute'],
            'attribute': opt['attribute'],
            'augment': True
        }
        opt['data_setting'] = data_setting

    elif opt['experiment'] == 'model' or opt['experiment'] == 'fake_only':

        if opt['fake_data_dir_new'] == '_':
            if opt['protected_attribute'] != 20:
                input_path_new = 'data/fake_images/protected' + attr_list[
                    opt['protected_attribute']] + '/' + attr_name + '/'
            else:
                input_path_new = 'data/fake_images/{}/'.format(attr_name)
        else:
            input_path_new = opt['fake_data_dir_new']

        input_path_orig = opt['fake_data_dir_orig']
        #scores = 'data/fake_images/' + attr_name+'_scores.pkl'
        if opt['fake_scores_target'] == '_':
            scores = 'data/fake_images/{}_scores.pkl'.format(attr_name)
        else:
            scores = opt['fake_scores_target']
        if opt['fake_scores_protected'] == '_':
            domain = 'data/fake_images/all_' + attr_list[
                opt['protected_attribute']] + '_scores.pkl'
        else:
            domain = opt['fake_scores_protected']
        params_train = {'batch_size': 32, 'shuffle': True, 'num_workers': 0}

        params_val = {'batch_size': 64, 'shuffle': False, 'num_workers': 0}
        real_params = {
            'path': opt['real_data_dir'],
            'attribute': opt['attribute'],
            'protected_attribute': opt['protected_attribute'],
            'number': 0
        }
        fake_params = {
            'path_new': input_path_new,
            'path_orig': input_path_orig,
            'attr_path': scores,
            'dom_path': domain,
            'range_orig_image': (15000, 175000),
            'range_orig_label': (160000, 320000),
            'range_new': (0, 160000),
        }
        data_setting = {
            'real_params': real_params,
            'fake_params': fake_params,
            'augment': True,
            'params_train': params_train,
            'params_val': params_val
        }
        opt['data_setting'] = data_setting
    return opt
Esempio n. 7
0
def main(opt):
    attr_list = utils.get_all_attr()
    attr_name1 = attr_list[opt['attribute1']]
    attr_name2 = attr_list[opt['attribute2']]

    print(attr_name1, attr_name2)
    print(opt)
    if opt['attribute1'] == opt['attribute2']:
        return

    train = create_dataset_full_skew(opt['data_setting']['path'],
                                     opt['attribute1'],
                                     opt['attribute2'],
                                     opt['data_setting']['params_real_train'],
                                     opt['data_setting']['augment'],
                                     CelebaDataset,
                                     opp=opt['opp'])

    val = create_dataset_full_skew(opt['data_setting']['path'],
                                   opt['attribute1'],
                                   opt['attribute2'],
                                   opt['data_setting']['params_real_val'],
                                   False,
                                   CelebaDataset,
                                   split='valid',
                                   opp=opt['opp'])

    save_path = opt['save_folder'] + '/best.pth'
    save_path_curr = opt['save_folder'] + '/epoch'
    if not opt['test_mode']:
        model_path = None
        AC = attribute_classifier(opt['device'],
                                  opt['dtype'],
                                  modelpath=model_path)
        val_weight = None
        for i in range(AC.epoch, opt['total_epochs']):
            AC.train(train)
            acc = AC.check_avg_precision(val, weights=val_weight)
            if acc > AC.best_acc:
                AC.best_acc = acc
                AC.save_model(save_path)
            AC.save_model(save_path_curr + str(i) + '.pth')

    AC = attribute_classifier(opt['device'], opt['dtype'], modelpath=save_path)

    for attr in [opt['attribute1'], opt['attribute2']]:
        val = create_dataset_actual(opt['data_setting']['path'],
                                    attr,
                                    20,
                                    opt['data_setting']['params_real_val'],
                                    False,
                                    CelebaDataset,
                                    split='valid')

        val_targets, val_scores = AC.get_scores(val)

        with open(opt['save_folder'] + '/val_scores_{}.pkl'.format(attr),
                  'wb+') as handle:
            pickle.dump(val_scores, handle)

        if opt['opp'] and attr == opt['attribute2']:
            val_targets = 1 - val_targets

        print('AP for attribute {}: {}', attr,
              100 * average_precision_score(val_targets, val_scores))