Beispiel #1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--save_dir',
                        type=str,
                        default='./natural_leakage',
                        help='path for saving checkpoints')

    parser.add_argument('--num_rounds', type=int, default=5)

    parser.add_argument('--balanced', action='store_true')
    parser.add_argument('--ratio', type=str, default='0')

    parser.add_argument('--num_verb', type=int, default=211)

    parser.add_argument('--annotation_dir',
                        type=str,
                        default='./data',
                        help='annotation files path')
    parser.add_argument('--image_dir',
                        default='./data/of500_images_resized',
                        help='image directory')

    parser.add_argument('--hid_size', type=int, default=300)

    parser.add_argument('--num_epochs', type=int, default=150)
    parser.add_argument('--learning_rate', type=float, default=0.00005)
    parser.add_argument('--print_every', type=int, default=500)

    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--crop_size', type=int, default=224)
    parser.add_argument('--image_size', type=int, default=256)

    args = parser.parse_args()

    args.gender_balanced = True  # always True as we want to compute the leakage
    args.fimodelune = False
    args.no_image = True

    args.blur = False
    args.blackout_face = False
    args.blackout = False
    args.blackout_box = False
    args.grayscale = False
    args.edges = False

    args.save_dir = os.path.join(args.save_dir, args.ratio)
    if not os.path.exists(args.save_dir): os.makedirs(args.save_dir)

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    train_transform = transforms.Compose([
        transforms.Resize(args.image_size),
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(), normalize
    ])

    test_transform = transforms.Compose([
        transforms.Resize(args.image_size),
        transforms.CenterCrop(args.crop_size),
        transforms.ToTensor(), normalize
    ])

    acc_f1 = dict()

    for round_id in range(args.num_rounds):

        print('round id is: {}'.format(round_id))

        train_data_ori = ImSituVerbGender(args, annotation_dir = args.annotation_dir, \
                image_dir = args.image_dir,split = 'train', transform = train_transform)

        val_data_ori = ImSituVerbGender(args, annotation_dir = args.annotation_dir, \
                image_dir = args.image_dir,split = 'val', transform = test_transform)

        test_data_ori = ImSituVerbGender(args, annotation_dir = args.annotation_dir, \
                image_dir = args.image_dir,split = 'test', transform = test_transform)

        acc_list = list()
        f1_list = list()

        #for p in [0.08, 0.0, 0.0001, 0.0003, 0.0005, 0.0007, 0.001, 0.0012, 0.0015, 0.0018, 0.002, 0.0025, 0.003, 0.0035, 0.004, 0.0045, 0.005, 0.006, \
        #0.0075, 0.009, 0.011, 0.013, 0.015, 0.017, 0.019, 0.021, 0.025, 0.04, 0.055, 0.07]:
        for p in [0.08, 0.1, 0.12, 0.13, 0.15]:
            val_p = copy.deepcopy(val_data_ori.verb_ann)
            for i in range(len(val_data_ori)):
                if random.random() < p:
                    gt_id = np.argmax(val_p[i, :])
                    val_p[i, gt_id] = 0
                    val_p[i, int(random.random() * len(verb2id))] = 1

            f1_list.append(
                f1_score(val_data_ori.verb_ann, val_p, average='macro'))

            acc_list.append(
                compute_acc(p, args, train_data_ori, val_data_ori,
                            test_data_ori))

        print('f1 scores: ', f1_list)
        print('accuracy: ', acc_list)

        acc_f1[round_id] = {'f1_scores': f1_list, 'accuracy': acc_list}

    print acc_f1
    all_f1s = []
    all_acc = []
    for i in range(args.num_rounds):
        all_f1s += acc_f1[i]['f1_scores']
        all_acc += acc_f1[i]['accuracy']
    print all_f1s
    print all_acc
Beispiel #2
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--exp_id', type=str,
            help='experiment id, e.g. conv4_300_1.0_0.2_1')

    parser.add_argument('--num_rounds', type=int,
            default = 5)

    parser.add_argument('--annotation_dir', type=str,
            default='../data',
            help='annotation files path')
    parser.add_argument('--image_dir',
            default = '../data/of500_images_resized',
            help='image directory')

    parser.add_argument('--gender_balanced', action='store_true',
            help='use gender balanced subset for training')

    parser.add_argument('--balanced', action='store_true',
            help='use balanced subset for training')
    parser.add_argument('--ratio', type=str,
            default = '0')

    parser.add_argument('--num_verb', type=int,
            default = 211)

    parser.add_argument('--adv_on', action='store_true',
            help='start adv training')
    parser.add_argument('--layer', type=str,
            help='extract image feature for adv at this layer')

    parser.add_argument('--adv_capacity', type=int, default=300,
            help='linear layer dimension for adv component')
    parser.add_argument('--adv_conv', action='store_true',
            help='add conv layers to adv component')
    parser.add_argument('--adv_lambda', type=float, default=1.0,
            help='weight assigned to adv loss')
    parser.add_argument('--no_avgpool', action='store_true',
            help='remove avgpool layer for adv component')
    parser.add_argument('--adv_dropout', type=float, default=0.2,
            help='parameter for dropout layter in adv component')

    parser.add_argument('--blackout', action='store_true')
    parser.add_argument('--blackout_box', action='store_true')
    parser.add_argument('--blur', action='store_true')
    parser.add_argument('--grayscale', action='store_true')
    parser.add_argument('--edges', action='store_true')
    parser.add_argument('--blackout_face', action='store_true')

    parser.add_argument('--noise', action='store_true',
            help='add noise to image features')
    parser.add_argument('--noise_scale', type=float, default=0.2,
            help='std in gaussian noise')
    parser.add_argument('--no_image', action='store_true')

    parser.add_argument('--attacker_capacity', type=int, default=300,
            help='linear layer dimension for attacker')
    parser.add_argument('--attacker_conv', action='store_true',
            help='add conv layers to attacker')
    parser.add_argument('--attacker_dropout', type=float, default=0.2,
            help='parameter for dropout layter in attacker')

    parser.add_argument('--hid_size', type=int, default=300,
            help='linear layer dimension for attacker')

    ## training setting for attacker
    parser.add_argument('--finetune', action='store_true')
    parser.add_argument('--autoencoder_finetune', action= 'store_true')
    parser.add_argument('--num_epochs', type=int, default=100)
    parser.add_argument('--batch_size', type=int, default=128)
    parser.add_argument('--crop_size', type=int, default=224)
    parser.add_argument('--image_size', type=int, default=256)
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--learning_rate', type=float, default=0.00001,
            help='attacker learning rate')

    args = parser.parse_args()


    normalize = transforms.Normalize(mean = [0.485, 0.456, 0.406],
        std = [0.229, 0.224, 0.225])
    train_transform = transforms.Compose([
        transforms.Resize(args.image_size),
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize])
    test_transform = transforms.Compose([
        transforms.Resize(args.image_size),
        transforms.CenterCrop(args.crop_size),
        transforms.ToTensor(),
        normalize])

    #Build the encoder from adv model
    adv_model_path = os.path.join('./models', args.exp_id)
    verb_num = len(id2verb)
    adv_model = VerbClassificationAdv(args, verb_num, args.adv_capacity, args.adv_dropout, args.adv_lambda).cuda()

    if os.path.isfile(os.path.join(adv_model_path, 'checkpoint.pth.tar')):
        print("=> loading encoder from '{}'".format(adv_model_path))
        loaded_model_name = 'model_best.pth.tar'
        checkpoint = torch.load(os.path.join(adv_model_path, loaded_model_name))
        best_performance = checkpoint['best_performance']
        adv_model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(adv_model_path))

    # Data samplers.
    val_data = ImSituVerbGender(args, annotation_dir = args.annotation_dir, \
            image_dir = args.image_dir,split = 'val', transform = test_transform)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size = 16, \
            shuffle = False, num_workers = 4,pin_memory = True)

    test_data = ImSituVerbGender(args, annotation_dir = args.annotation_dir, \
            image_dir = args.image_dir,split = 'test', transform = test_transform)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size = 16, \
            shuffle = False, num_workers = 4,pin_memory = True)

    print('val set performance:')
    test(args, adv_model, val_loader)

    print('test set performance:')
    test(args, adv_model, test_loader)

    acc_list = {}
    acc_list['potential'] = []

    args.gender_balanced = True
    for i in range(args.num_rounds):

        train_data = ImSituVerbGender(args, annotation_dir = args.annotation_dir, \
            image_dir = args.image_dir,split = 'train', transform = train_transform)
        train_loader = torch.utils.data.DataLoader(train_data, batch_size = 16,
                    shuffle = True, num_workers = 6, pin_memory = True)

        # Data samplers for val set.
        val_data = ImSituVerbGender(args, annotation_dir = args.annotation_dir, \
                image_dir = args.image_dir,split = 'val', transform = test_transform)
        val_loader = torch.utils.data.DataLoader(val_data, batch_size = 16, \
                shuffle = False, num_workers = 4,pin_memory = True)

        # Data samplers for test set.
        test_data = ImSituVerbGender(args, annotation_dir = args.annotation_dir, \
                image_dir = args.image_dir,split = 'test', transform = test_transform)
        test_loader = torch.utils.data.DataLoader(test_data, batch_size = 16, \
                shuffle = False, num_workers = 4,pin_memory = True)

        image_features_path = os.path.join(adv_model_path, 'image_features')
        if not os.path.exists(image_features_path):
            os.makedirs(image_features_path)

        # get image features from encoder
        generate_image_feature('train', image_features_path, train_loader, adv_model)
        generate_image_feature('val', image_features_path, val_loader, adv_model)
        generate_image_feature('test', image_features_path, test_loader, adv_model)

        train_data = ImSituVerbGenderFeature(args, image_features_path, split = 'train')
        train_loader = torch.utils.data.DataLoader(train_data, batch_size = args.batch_size,
                    shuffle = True, num_workers = 6, pin_memory = True)

        val_data = ImSituVerbGenderFeature(args, image_features_path, split = 'val')
        val_loader = torch.utils.data.DataLoader(val_data, batch_size = args.batch_size,
                    shuffle = True, num_workers = 6, pin_memory = True)

        test_data = ImSituVerbGenderFeature(args, image_features_path, split = 'test')
        test_loader = torch.utils.data.DataLoader(test_data, batch_size = args.batch_size,
                    shuffle = True, num_workers = 6, pin_memory = True)


        model_save_dir = './attacker'
        model_save_dir = os.path.join(model_save_dir, str(args.exp_id))

        if not os.path.exists(model_save_dir): os.makedirs(model_save_dir)

        lr = args.learning_rate

        for feature_type in acc_list.keys():

            #import pdb
            #pdb.set_trace()

            attacker = GenderClassifier(args, args.num_verb)

            attacker = attacker.cuda()

            optimizer = optim.Adam(attacker.parameters(), lr=lr, weight_decay = 1e-5)

            train_attacker(args.num_epochs, optimizer, attacker, adv_model, train_loader, val_loader, \
               model_save_dir, feature_type)

            # evaluate best attacker on balanced test split
            best_attacker = torch.load(model_save_dir + '/best_attacker.pth.tar')
            attacker.load_state_dict(best_attacker['state_dict'])
            _, val_acc = epoch_pass(0, val_loader, attacker, adv_model, None, False, feature_type)
            val_acc = 0.5 + abs(val_acc - 0.5)
            _, test_acc = epoch_pass(0, test_loader, attacker, adv_model, None, False, feature_type)
            test_acc = 0.5 + abs(test_acc - 0.5)
            acc_list[feature_type].append(test_acc)
            print('round {} feature type: {}, test acc: {}, val acc: {}'.format(i, feature_type, test_acc, val_acc))

    for feature_type in acc_list.keys():
        print(acc_list[feature_type], np.std(np.array(acc_list[feature_type])))
        print('{} average leakage: {}'.format(feature_type, np.mean(np.array(acc_list[feature_type]))))
Beispiel #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--save_dir', type=str,
            help='path for saving checkpoints')
    parser.add_argument('--log_dir', type=str,
            help='path for saving log files')

    parser.add_argument('--ratio', type=str,
            default = '0')
    parser.add_argument('--num_verb', type=int,
            default = 211)

    parser.add_argument('--annotation_dir', type=str,
            default='./data',
            help='annotation files path')
    parser.add_argument('--image_dir',
            default = './data/of500_images_resized',
            help='image directory')

    parser.add_argument('--balanced', action='store_true',
            help='use balanced subset for training')
    parser.add_argument('--gender_balanced', action='store_true',
            help='use balanced subset for training, ratio will be 1/2/3')
    parser.add_argument('--batch_balanced', action='store_true',
            help='in every batch, gender balanced')

    parser.add_argument('--no_image', action='store_true',
            help='do not load image in dataloaders')

    parser.add_argument('--blackout', action='store_true')
    parser.add_argument('--blackout_box', action='store_true')
    parser.add_argument('--blackout_face', action='store_true')
    parser.add_argument('--blur', action='store_true')
    parser.add_argument('--grayscale', action='store_true')
    parser.add_argument('--edges', action='store_true')

    parser.add_argument('--resume',action='store_true')
    parser.add_argument('--learning_rate', type=float, default=0.0001)
    parser.add_argument('--finetune', action='store_true')
    parser.add_argument('--num_epochs', type=int, default=50)
    parser.add_argument('--batch_size', type=int, default=64)

    parser.add_argument('--crop_size', type=int, default=224)
    parser.add_argument('--image_size', type=int, default=256)
    parser.add_argument('--start_epoch', type=int, default=1)
    parser.add_argument('--seed', type=int, default=1)

    args = parser.parse_args()

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    # create model save directory
    args.save_dir = os.path.join('./models', args.save_dir)
    if os.path.exists(args.save_dir) and not args.resume:
        print('Path {} exists! and not resuming'.format(args.save_dir))
        return
    if not os.path.exists(args.save_dir): os.makedirs(args.save_dir)

    # create log save directory for train and val
    args.log_dir = os.path.join('./logs', args.log_dir)
    train_log_dir = os.path.join(args.log_dir, 'train')
    val_log_dir = os.path.join(args.log_dir, 'val')
    if not os.path.exists(train_log_dir): os.makedirs(train_log_dir)
    if not os.path.exists(val_log_dir): os.makedirs(val_log_dir)
    train_logger = Logger(train_log_dir)
    val_logger = Logger(val_log_dir)

    #save all hyper-parameters for training
    with open(os.path.join(args.log_dir, "arguments.txt"), "a") as f:
        f.write(str(args)+'\n')

    # image preprocessing
    normalize = transforms.Normalize(mean = [0.485, 0.456, 0.406],
        std = [0.229, 0.224, 0.225])
    train_transform = transforms.Compose([
        transforms.Resize(args.image_size),
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize])
    val_transform = transforms.Compose([
        transforms.Resize(args.image_size),
        transforms.CenterCrop(args.crop_size),
        transforms.ToTensor(),
        normalize])

    # Data samplers.
    train_data = ImSituVerbGender(args, annotation_dir = args.annotation_dir, \
            image_dir = args.image_dir, split = 'train', transform = train_transform)

    val_data = ImSituVerbGender(args, annotation_dir = args.annotation_dir, \
            image_dir = args.image_dir, split = 'val', transform = val_transform)

    # Data loaders / batch assemblers.
    train_loader = torch.utils.data.DataLoader(train_data, batch_size = args.batch_size,
            shuffle = True, num_workers = 6, pin_memory = True)

    val_loader = torch.utils.data.DataLoader(val_data, batch_size = args.batch_size,
            shuffle = False, num_workers = 4, pin_memory = True)

    # build the models
    model = VerbClassification(args, args.num_verb).cuda()

    # build loss
    verb_weights = torch.FloatTensor(train_data.getVerbWeights())
    criterion = nn.CrossEntropyLoss(weight=verb_weights, reduction='elementwise_mean').cuda()

    # build optimizer for trainable loss
    def trainable_params():
        for param in model.parameters():
            if param.requires_grad:
                yield param

    num_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print('num_trainable_params:', num_trainable_params)
    optimizer = torch.optim.Adam(trainable_params(), args.learning_rate, weight_decay = 1e-5)

    best_performance = 0
    if args.resume:
        if os.path.isfile(os.path.join(args.save_dir, 'checkpoint.pth.tar')):
            print("=> loading checkpoint '{}'".format(args.save_dir))
            checkpoint = torch.load(os.path.join(args.save_dir, 'checkpoint.pth.tar'))
            args.start_epoch = checkpoint['epoch']
            best_performance = checkpoint['best_performance']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.save_dir))

    print('before training, evaluate the model')
    test(args, 0, model, criterion, val_loader, val_logger, logging=False)

    for epoch in range(args.start_epoch, args.num_epochs + 1):
        train(args, epoch, model, criterion, train_loader, optimizer, \
                train_logger, logging = True)
        current_performance = test(args, epoch, model, criterion, val_loader, \
                val_logger, logging = True)
        is_best = current_performance > best_performance
        best_performance = max(current_performance, best_performance)
        model_state = {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'best_performance': best_performance}
        save_checkpoint(args, model_state, is_best, os.path.join(args.save_dir, \
                'checkpoint.pth.tar'))

        # at the end of every run, save the model
        if epoch == args.num_epochs:
            torch.save(model_state, os.path.join(args.save_dir, \
                'checkpoint_%s.pth.tar' % str(args.num_epochs)))
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--exp_id', type=str, help='experiment id')
    parser.add_argument('--log_dir',
                        type=str,
                        help='path for saving log files')

    parser.add_argument('--annotation_dir',
                        type=str,
                        default='../data',
                        help='annotation files path')
    parser.add_argument('--image_dir',
                        default='../data/of500_images_resized',
                        help='image directory')

    parser.add_argument('--ratio', type=str, default='0')
    parser.add_argument('--num_verb', type=int, default=211)

    parser.add_argument('--no_image',
                        action='store_true',
                        help='do not load image in dataloaders')

    parser.add_argument('--balanced',
                        action='store_true',
                        help='use balanced subset for training')
    parser.add_argument('--gender_balanced',
                        action='store_true',
                        help='use gender balanced subset for training')
    parser.add_argument('--batch_balanced',
                        action='store_true',
                        help='in every batch, gender balanced')

    parser.add_argument('--beta',
                        type=float,
                        default=1.0,
                        help='autoencoder l1 loss weight')

    parser.add_argument('--adv_on',
                        action='store_true',
                        help='start adv training')
    parser.add_argument('--layer',
                        type=str,
                        help='extract image feature for adv at this layer')
    parser.add_argument('--adv_conv',
                        action='store_true',
                        help='add conv layers to adv component')
    parser.add_argument('--no_avgpool',
                        action='store_true',
                        help='remove avgpool layer for adv component')
    parser.add_argument('--adv_capacity',
                        type=int,
                        help='linear layer dimension for adv component')
    parser.add_argument('--adv_lambda',
                        type=float,
                        help='weight assigned to adv loss')
    parser.add_argument('--dropout',
                        type=float,
                        help='parameter for dropout layter in adv component')

    parser.add_argument('--blackout', action='store_true')
    parser.add_argument('--blackout_box', action='store_true')
    parser.add_argument('--blackout_face', action='store_true')
    parser.add_argument('--blur', action='store_true')
    parser.add_argument('--grayscale', action='store_true')
    parser.add_argument('--edges', action='store_true')

    parser.add_argument('--resume', action='store_true')
    parser.add_argument('--finetune', action='store_true')
    parser.add_argument('--autoencoder_finetune', action='store_true')
    parser.add_argument('--num_epochs', type=int, default=100)
    parser.add_argument('--start_epoch', type=int, default=1)
    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--crop_size', type=int, default=224)
    parser.add_argument('--image_size', type=int, default=256)
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--learning_rate', type=float, default=0.00001)

    args = parser.parse_args()

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    args.save_dir = os.path.join('./models', args.layer + '_' + str(args.adv_lambda) + '_' + \
            str(args.beta) + '_' + args.exp_id)

    if os.path.exists(args.save_dir) and not args.resume:
        print('Path {} exists! and not resuming'.format(args.save_dir))
        return
    if not os.path.exists(args.save_dir): os.makedirs(args.save_dir)

    args.log_dir = os.path.join('./logs', args.layer + '_' + str(args.adv_lambda) + '_' + \
            str(args.beta) + '_' + args.exp_id)

    train_log_dir = os.path.join(args.log_dir, 'train')
    val_log_dir = os.path.join(args.log_dir, 'val')
    if not os.path.exists(train_log_dir): os.makedirs(train_log_dir)
    if not os.path.exists(val_log_dir): os.makedirs(val_log_dir)
    train_logger = Logger(train_log_dir)
    val_logger = Logger(val_log_dir)

    #save all parameters for training
    with open(os.path.join(args.log_dir, "arguments.txt"), "a") as f:
        f.write(str(args) + '\n')

    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])
    # Image preprocessing
    train_transform = transforms.Compose([
        transforms.Resize(args.image_size),
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(), normalize
    ])
    val_transform = transforms.Compose([
        transforms.Resize(args.image_size),
        transforms.CenterCrop(args.crop_size),
        transforms.ToTensor(), normalize
    ])

    # Data samplers.
    train_data = ImSituVerbGender(args, annotation_dir = args.annotation_dir, \
            image_dir = args.image_dir, split = 'train', transform = train_transform)

    val_data = ImSituVerbGender(args, annotation_dir = args.annotation_dir, \
            image_dir = args.image_dir, split = 'val', transform = val_transform)

    args.gender_balanced = True
    val_data_gender_balanced = ImSituVerbGender(args, annotation_dir = args.annotation_dir, \
            image_dir = args.image_dir, split = 'val', transform = val_transform)
    args.gender_balanced = False

    # Data loaders / batch assemblers.
    if args.batch_balanced:
        train_batch_size = int(2.5 * args.batch_size)
    else:
        train_batch_size = int(args.batch_size)

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=train_batch_size,
                                               shuffle=True,
                                               num_workers=6,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(val_data,
                                             batch_size=args.batch_size,
                                             shuffle=False,
                                             num_workers=4,
                                             pin_memory=True)

    val_loader_gender_balanced = torch.utils.data.DataLoader(val_data_gender_balanced, \
        batch_size = args.batch_size, shuffle = False, num_workers = 4, pin_memory = True)

    # Build the models
    model = VerbClassificationAdv(args, args.num_verb, args.adv_capacity,
                                  args.dropout, args.adv_lambda).cuda()

    checkpoint = torch.load('./origin/model_best.pth.tar')
    # load partial weights
    model_dict = model.state_dict()
    pretrained_dict = {
        k: v
        for k, v in checkpoint['state_dict'].items() if k in model_dict
    }
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)

    verb_weights = torch.FloatTensor(train_data.getVerbWeights())
    criterion = nn.CrossEntropyLoss(weight=verb_weights,
                                    reduction='elementwise_mean').cuda()
    criterionL1 = torch.nn.L1Loss(reduction='elementwise_mean')

    # print model
    def trainable_params():
        for param in model.parameters():
            if param.requires_grad:
                yield param

    num_trainable_params = sum(p.numel() for p in model.parameters()
                               if p.requires_grad)
    print('num_trainable_params:', num_trainable_params)

    optimizer = torch.optim.Adam(trainable_params(),
                                 args.learning_rate,
                                 weight_decay=1e-5)

    best_performance = 0
    if args.resume:
        if os.path.isfile(os.path.join(args.save_dir, 'checkpoint.pth.tar')):
            print("=> loading checkpoint '{}'".format(args.save_dir))
            checkpoint = torch.load(
                os.path.join(args.save_dir, 'checkpoint.pth.tar'))
            args.start_epoch = checkpoint['epoch']
            best_performance = checkpoint['best_performance']
            # load partial weights
            model_dict = model.state_dict()
            pretrained_dict = {
                k: v
                for k, v in checkpoint['state_dict'].items() if k in model_dict
            }
            model_dict.update(pretrained_dict)
            model.load_state_dict(model_dict)
            print("=> loaded checkpoint (epoch {})".format(
                checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.save_dir))

    print('before training, evaluate the model')
    test_balanced(args,
                  0,
                  model,
                  criterion,
                  criterionL1,
                  val_loader_gender_balanced,
                  val_logger,
                  logging=False)
    test(args,
         0,
         model,
         criterion,
         criterionL1,
         val_loader,
         val_logger,
         logging=False)

    for epoch in range(args.start_epoch, args.num_epochs + 1):
        train(args, epoch, model, criterion, criterionL1, train_loader, optimizer, \
                train_logger, logging=True)
        test_balanced(args,
                      epoch,
                      model,
                      criterion,
                      criterionL1,
                      val_loader_gender_balanced,
                      val_logger,
                      logging=True)

        current_performance = test(args, epoch, model, criterion, criterionL1,  val_loader, \
                val_logger, logging = True)
        is_best = current_performance > best_performance
        best_performance = max(current_performance, best_performance)
        model_state = {
            'epoch': epoch + 1,
            'state_dict': model.state_dict(),
            'best_performance': best_performance
        }
        save_checkpoint(args, model_state, is_best, os.path.join(args.save_dir, \
                'checkpoint.pth.tar'))
Beispiel #5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--save_dir', type=str,
            default='./dataset_leakage',
            help='path for saving checkpoints')

    parser.add_argument('--num_rounds', type=int,
            default = 5)

    parser.add_argument('--balanced', action='store_true')
    parser.add_argument('--ratio', type=str,
            default = '0')

    parser.add_argument('--num_verb', type=int,
            default = 211)

    parser.add_argument('--annotation_dir', type=str,
            default='./data',
            help='annotation files path')
    parser.add_argument('--image_dir',
            default = './data/of500_images_resized',
            help='image directory')

    parser.add_argument('--hid_size', type=int,
            default = 300)

    parser.add_argument('--no_image', action='store_true')

    parser.add_argument('--num_epochs', type=int, default=150)
    parser.add_argument('--learning_rate', type=float, default=0.00005)
    parser.add_argument('--print_every', type=int, default=500)

    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--crop_size', type=int, default=224)
    parser.add_argument('--image_size', type=int, default=256)

    args = parser.parse_args()

    args.gender_balanced = True # always True as we want to compute the leakage
    args.fimodelune=False
    args.no_image = True

    args.blur = False
    args.blackout_face = False
    args.blackout = False
    args.blackout_box = False
    args.grayscale = False
    args.edges = False


    normalize = transforms.Normalize(mean = [0.485, 0.456, 0.406],
        std = [0.229, 0.224, 0.225])
    train_transform = transforms.Compose([
        transforms.Resize(args.image_size),
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize])

    test_transform = transforms.Compose([
        transforms.Resize(args.image_size),
        transforms.CenterCrop(args.crop_size),
        transforms.ToTensor(),
        normalize])

    acc_list = []
    for i in range(args.num_rounds):

        train_data = ImSituVerbGender(args, annotation_dir = args.annotation_dir, \
                image_dir = args.image_dir,split = 'train', transform = train_transform)
        train_loader = torch.utils.data.DataLoader(train_data, batch_size = args.batch_size,
                    shuffle = True, num_workers = 6, pin_memory = True)

        # Data samplersi for val set.
        val_data = ImSituVerbGender(args, annotation_dir = args.annotation_dir, \
                image_dir = args.image_dir,split = 'val', transform = test_transform)
        val_loader = torch.utils.data.DataLoader(val_data, batch_size = args.batch_size, \
                shuffle = False, num_workers = 4,pin_memory = True)

        # Data samplers for test set.
        test_data = ImSituVerbGender(args, annotation_dir = args.annotation_dir, \
                image_dir = args.image_dir,split = 'test', transform = test_transform)
        test_loader = torch.utils.data.DataLoader(test_data, batch_size = args.batch_size, \
                shuffle = False, num_workers = 4,pin_memory = True)

        # initialize gender classifier
        model = GenderClassifier(args, args.num_verb)
        model = model.cuda()

        optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay = 1e-5)

        if args.balanced:
            model_save_dir = os.path.join(args.save_dir, 'ratio_'+args.ratio)
        else:
            model_save_dir = os.path.join(args.save_dir, 'origin')

        if not os.path.exists(model_save_dir):
            os.makedirs(model_save_dir)

        train_genderclassifier(model, args.num_epochs, optimizer, train_loader, val_loader, \
            model_save_dir, args.print_every)

        model.load_state_dict(torch.load(model_save_dir+'/model_best.pth.tar')['state_dict'])
        loss, acc = epoch_pass(-1, test_loader, model, None, False, print_every=500)
        loss, val_acc = epoch_pass(-1, val_loader, model, None, False, print_every=500)
        acc = 0.5 + abs(acc - 0.5)
        val_acc = 0.5 + abs(val_acc - 0.5)
        print('round {} acc on test set: {}, val acc: {}'.format(i, acc*100, val_acc*100))
        acc_list.append(acc)

    print acc_list
    acc_ = np.array(acc_list)
    mean_acc = np.mean(acc_)
    std_acc = np.std(acc_)
    print mean_acc, std_acc
Beispiel #6
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--exp_id', type=str,
            help='experiment id, e.g. conv4_300_1.0_0.2_1')

    parser.add_argument('--num_rounds', type=int,
            default = 5)

    parser.add_argument('--annotation_dir', type=str,
            default='../data',
            help='annotation files path')
    parser.add_argument('--image_dir',
            default = '../data/of500_images_resized',
            help='image directory')

    parser.add_argument('--gender_balanced', action='store_true',
            help='use gender balanced subset for training')
    parser.add_argument('--balanced', action='store_true',
            help='use balanced subset for training')
    parser.add_argument('--ratio', type=str,
            default = '0')

    parser.add_argument('--num_verb', type=int,
            default = 211)

    parser.add_argument('--adv_on', action='store_true',
            help='start adv training')
    parser.add_argument('--layer', type=str,
            help='extract image feature for adv at this layer')

    parser.add_argument('--adv_capacity', type=int, default=300,
            help='linear layer dimension for adv component')
    parser.add_argument('--adv_conv', action='store_true',
            help='add conv layers to adv component')
    parser.add_argument('--adv_lambda', type=float, default=1.0,
            help='weight assigned to adv loss')
    parser.add_argument('--no_avgpool', action='store_true',
            help='remove avgpool layer for adv component')
    parser.add_argument('--adv_dropout', type=float, default=0.2,
            help='parameter for dropout layter in adv component')

    parser.add_argument('--hid_size', type=int, default=300,
            help='linear layer dimension for attacker')

    parser.add_argument('--attacker_capacity', type=int, default=300,
            help='linear layer dimension for attacker')
    parser.add_argument('--attacker_conv', action='store_true',
            help='add conv layers to attacker')
    parser.add_argument('--attacker_dropout', type=float, default=0.2,
            help='parameter for dropout layter in attacker')

    parser.add_argument('--blackout', action='store_true')
    parser.add_argument('--blackout_box', action='store_true')
    parser.add_argument('--blur', action='store_true')
    parser.add_argument('--grayscale', action='store_true')
    parser.add_argument('--edges', action='store_true')
    parser.add_argument('--blackout_face', action='store_true')

    parser.add_argument('--noise', action='store_true',
            help='add noise to image features')
    parser.add_argument('--noise_scale', type=float, default=0.2,
            help='std in gaussian noise')
    parser.add_argument('--no_image', action='store_true')

    ## training setting for attacker
    parser.add_argument('--finetune', action='store_true')
    parser.add_argument('--autoencoder_finetune', action='store_true')
    parser.add_argument('--num_epochs', type=int, default=100)
    parser.add_argument('--batch_size', type=int, default=128)
    parser.add_argument('--crop_size', type=int, default=224)
    parser.add_argument('--image_size', type=int, default=256)
    parser.add_argument('--seed', type=int, default=1)
    parser.add_argument('--learning_rate', type=float, default=0.00005,
            help='attacker learning rate')

    args = parser.parse_args()

    test_transform = transforms.Compose([
        transforms.Resize(args.image_size),
        transforms.CenterCrop(args.crop_size),
        transforms.ToTensor(),
        normalize])

    #Build the encoder from adv model
    adv_model_path = os.path.join('./models', args.exp_id)
    verb_num = len(id2verb)
    adv_model = VerbClassificationAdv(args, verb_num, args.adv_capacity, args.adv_dropout, args.adv_lambda).cuda()

    if os.path.isfile(os.path.join(adv_model_path, 'model_best.pth.tar')):
        print("=> loading encoder from '{}'".format(adv_model_path))
        checkpoint = torch.load(os.path.join(adv_model_path, 'model_best.pth.tar'))
        best_performance = checkpoint['best_performance']
        adv_model.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(adv_model_path))

    adv_model.eval()

    # Data samplers.

    test_data = ImSituVerbGender(args, annotation_dir = args.annotation_dir, \
            image_dir = args.image_dir,split = 'test', transform = test_transform)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size = args.batch_size, \
            shuffle = False, num_workers = 4,pin_memory = True)

    save_dir = os.path.join('./sample_images/auto_debias', args.exp_id+'_'+str(checkpoint['epoch']))
    if not os.path.exists(save_dir): os.makedirs(save_dir)

    results = list()
    for batch_idx, (images, targets, genders, image_ids) in enumerate(test_loader):
        if batch_idx == 10: break # constrain epoch size
        images = images.cuda()

        # save original images
        for i in range(len(images)):
            image = transf(images[i].clone().cpu())
            image.save('./sample_images/origin/{}.jpg'.format(image_ids[i].item()))

        # Forward, Backward and Optimizer
        task_pred, adv_pred, encoded_images = adv_model(images)
        for i in range(len(encoded_images)):
            image = transf(encoded_images[i].clone().cpu())
            imageID = image_ids[i].item()
            image.save('{}/{}.jpg'.format(save_dir, imageID))
            results.append({'imageID': imageID,
                           'original_image_path': './origin/{}.jpg'.format(imageID),
                           'auto_debias_image_path': './auto_debias/{}/{}.jpg'.format( \
                           args.exp_id+'_'+str(checkpoint['epoch']), imageID)})
    # render result
    import jinja2
    templateLoader = jinja2.FileSystemLoader(searchpath='.')
    templateEnv = jinja2.Environment(loader = templateLoader)
    template = templateEnv.get_template('vis_template.html')
    txt = template.render({'results': results, 'exp_id': args.exp_id})
    fh = open(os.path.join('./sample_images/', '{}_epoch_{}_predictions.html'.format(args.exp_id, checkpoint['epoch'])), 'w')
    fh.write(txt)
    fh.close()