Exemple #1
0
def compute_acc(p, args, train_data_ori, val_data_ori, test_data_ori):
    train_data = copy.deepcopy(train_data_ori)
    val_data = copy.deepcopy(val_data_ori)
    test_data = copy.deepcopy(test_data_ori)

    # randomly flip groundtruth with probability p
    for i in range(len(train_data)):
        for j in range(len(object2id)):
            if random.random() < p:
                train_data.object_ann[i, j] = 1 - train_data.object_ann[i, j]

    for i in range(len(val_data)):
        for j in range(len(object2id)):
            if random.random() < p:
                val_data.object_ann[i, j] = 1 - val_data.object_ann[i, j]

    for i in range(len(test_data)):
        for j in range(len(object2id)):
            if random.random() < p:
                test_data.object_ann[i, j] = 1 - test_data.object_ann[i, j]

    # Data samplers
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=6,
                                               pin_memory=True)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size = args.batch_size, \
            shuffle = False, num_workers = 4,pin_memory = True)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size = args.batch_size, \
            shuffle = False, num_workers = 4,pin_memory = True)

    model = GenderClassifier(args, args.num_object)
    model = model.cuda()

    optimizer = optim.Adam(model.parameters(),
                           lr=args.learning_rate,
                           weight_decay=1e-5)

    model_save_dir = args.save_dir
    train_genderclassifier(model, args.num_epochs, optimizer, train_loader, val_loader, model_save_dir, \
            args.print_every)

    model.load_state_dict(
        torch.load(model_save_dir + '/model_best.pth.tar')['state_dict'])
    loss, acc = epoch_pass(0, test_loader, model, None, False, print_every=500)
    acc = 0.5 + abs(acc - 0.5)
    print(' when p is {}, gender acc on test set: {}'.format(p, acc * 100))

    return acc
Exemple #2
0
def main():

    parser = argparse.ArgumentParser()
    parser.add_argument('--save_dir', type=str,
            default='./dataset_leakage',
            help='path for saving checkpoints')

    parser.add_argument('--num_rounds', type=int,
            default = 5)

    parser.add_argument('--balanced', action='store_true')
    parser.add_argument('--ratio', type=str,
            default = '0')
    parser.add_argument('--num_object', type=int,
            default = 79)

    parser.add_argument('--annotation_dir', type=str,
            default='./data',
            help='annotation files path')
    parser.add_argument('--image_dir',
            default = './data',
            help='image directory')

    parser.add_argument('--hid_size', type=int,
            default = 300)

    parser.add_argument('--no_image', action='store_true')

    parser.add_argument('--num_epochs', type=int, default=100)
    parser.add_argument('--learning_rate', type=float, default=0.00005)
    parser.add_argument('--print_every', type=int, default=500)

    parser.add_argument('--batch_size', type=int, default=32)
    parser.add_argument('--crop_size', type=int, default=224)
    parser.add_argument('--image_size', type=int, default=256)

    args = parser.parse_args()

    args.gender_balanced = True # always True as we want to compute the leakage
    args.no_image = True

    args.blur = False
    args.blackout_face = False
    args.blackout = False
    args.blackout_box = False
    args.grayscale = False
    args.edges = False

    normalize = transforms.Normalize(mean = [0.485, 0.456, 0.406],
        std = [0.229, 0.224, 0.225])
    train_transform = transforms.Compose([
        transforms.Resize(args.image_size),
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize])

    test_transform = transforms.Compose([
        transforms.Resize(args.image_size),
        transforms.CenterCrop(args.crop_size),
        transforms.ToTensor(),
        normalize])

    acc_list = []
    for i in range(args.num_rounds):

        train_data = CocoObjectGender(args, annotation_dir = args.annotation_dir, \
                image_dir = args.image_dir,split = 'train', transform = train_transform)
        train_loader = torch.utils.data.DataLoader(train_data, batch_size = args.batch_size,
                    shuffle = True, num_workers = 6, pin_memory = True)


        # Data samplersi for val set.
        val_data = CocoObjectGender(args, annotation_dir = args.annotation_dir, \
                image_dir = args.image_dir,split = 'val', transform = test_transform)
        val_loader = torch.utils.data.DataLoader(val_data, batch_size = args.batch_size, \
                shuffle = False, num_workers = 4,pin_memory = True)

        # Data samplers for test set.
        test_data = CocoObjectGender(args, annotation_dir = args.annotation_dir, \
                image_dir = args.image_dir,split = 'test', transform = test_transform)
        test_loader = torch.utils.data.DataLoader(test_data, batch_size = args.batch_size, \
                shuffle = False, num_workers = 4,pin_memory = True)

        # initialize gender classifier
        model = GenderClassifier(args, args.num_object)
        model = model.cuda()

        optimizer = optim.Adam(model.parameters(), lr=args.learning_rate, weight_decay = 1e-5)

        model_save_dir = os.path.join(args.save_dir, 'ratio_'+args.ratio)

        if not os.path.exists(model_save_dir):
            os.makedirs(model_save_dir)

        train_genderclassifier(model, args.num_epochs, optimizer, train_loader, val_loader, \
            model_save_dir, args.print_every)

        model.load_state_dict(torch.load(model_save_dir+'/model_best.pth.tar')['state_dict'])
        loss, acc = epoch_pass(0, test_loader, model, None, False, print_every=500)
        loss, val_acc = epoch_pass(0, val_loader, model, None, False, print_every=500)
        acc = 0.5 + abs(acc - 0.5)
        val_acc = 0.5 + abs(val_acc - 0.5)
        print('round {} acc on test set: {}, val acc: {}'.format(i, acc*100, val_acc*100))
        acc_list.append(acc)

    print acc_list
    acc_ = np.array(acc_list)
    mean_acc = np.mean(acc_)
    std_acc = np.std(acc_)
    print mean_acc, std_acc
Exemple #3
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--exp_id', type=str,
            help='experiment id, e.g. conv4_300_1.0_0.2_1')

    parser.add_argument('--num_rounds', type=int,
            default = 5)

    parser.add_argument('--annotation_dir', type=str,
            default='./data',
            help='annotation files path')
    parser.add_argument('--image_dir',
            default = './data',
            help='image directory')

    parser.add_argument('--gender_balanced', action='store_true',
            help='use gender balanced subset for training')

    parser.add_argument('--balanced', action='store_true',
            help='use balanced subset for training')
    parser.add_argument('--ratio', type=str,
            default = '0')

    parser.add_argument('--num_object', type=int,
            default = 79)

    parser.add_argument('--no_image', action='store_true')

    parser.add_argument('--blackout', action='store_true')
    parser.add_argument('--blackout_face', action='store_true')
    parser.add_argument('--blackout_box', action='store_true')
    parser.add_argument('--blur', action='store_true')
    parser.add_argument('--grayscale', action='store_true')
    parser.add_argument('--edges', action='store_true')

    parser.add_argument('--noise', action='store_true',
            help='add noise to image features')
    parser.add_argument('--noise_scale', type=float, default=0.2,
            help='std in gaussian noise')

    parser.add_argument('--hid_size', type=int, default=300,
            help='linear layer dimension for attacker')

    ## training setting for attacker
    parser.add_argument('--num_epochs', type=int, default=100)
    parser.add_argument('--finetune', action='store_true')
    parser.add_argument('--learning_rate', type=float, default=0.00005,
            help='attacker learning rate')

    parser.add_argument('--batch_size', type=int, default=64)
    parser.add_argument('--crop_size', type=int, default=224)
    parser.add_argument('--image_size', type=int, default=256)
    parser.add_argument('--seed', type=int, default=1)


    args = parser.parse_args()

    normalize = transforms.Normalize(mean = [0.485, 0.456, 0.406],
        std = [0.229, 0.224, 0.225])
    train_transform = transforms.Compose([
        transforms.Resize(args.image_size),
        transforms.RandomCrop(args.crop_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize])

    test_transform = transforms.Compose([
        transforms.Resize(args.image_size),
        transforms.CenterCrop(args.crop_size),
        transforms.ToTensor(),
        normalize])

    #Build the encoder
    encoder = ObjectMultiLabelEncoder(args, args.num_object).cuda()
    model_path = os.path.join('./models', args.exp_id)
    if os.path.isfile(os.path.join(model_path, 'model_best.pth.tar')):
        print("=> loading encoder from '{}'".format(model_path))
        checkpoint = torch.load(os.path.join(model_path, 'model_best.pth.tar'))
        best_score = checkpoint['best_performance']
        encoder.load_state_dict(checkpoint['state_dict'])
        print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
    else:
        print("=> no checkpoint found at '{}'".format(model_path))

    encoder.eval()


    # Data samplers.
    val_data = CocoObjectGender(args, annotation_dir = args.annotation_dir, \
            image_dir = args.image_dir,split = 'val', transform = test_transform)
    val_loader = torch.utils.data.DataLoader(val_data, batch_size = args.batch_size, \
            shuffle = False, num_workers = 4,pin_memory = True)

    test_data = CocoObjectGender(args, annotation_dir = args.annotation_dir, \
            image_dir = args.image_dir,split = 'test', transform = test_transform)
    test_loader = torch.utils.data.DataLoader(test_data, batch_size = args.batch_size, \
            shuffle = False, num_workers = 4,pin_memory = True)

    print('val set performance:')
    test(args, encoder, val_loader)

    print('test set performance:')
    test(args, encoder, test_loader)

    acc_list = {}
    #acc_list['image_feature'] = []
    acc_list['potential'] = []

    args.gender_balanced = True

    for i in range(args.num_rounds):

        train_data = CocoObjectGender(args, annotation_dir = args.annotation_dir, \
            image_dir = args.image_dir,split = 'train', transform = train_transform)
        train_loader = torch.utils.data.DataLoader(train_data, batch_size = args.batch_size,
                    shuffle = True, num_workers = 6, pin_memory = True)

        # Data samplers for val set.
        val_data = CocoObjectGender(args, annotation_dir = args.annotation_dir, \
                image_dir = args.image_dir,split = 'val', transform = test_transform)
        val_loader = torch.utils.data.DataLoader(val_data, batch_size = args.batch_size, \
                shuffle = False, num_workers = 4,pin_memory = True)

        # Data samplers for test set.
        test_data = CocoObjectGender(args, annotation_dir = args.annotation_dir, \
                image_dir = args.image_dir,split = 'test', transform = test_transform)
        test_loader = torch.utils.data.DataLoader(test_data, batch_size = args.batch_size, \
                shuffle = False, num_workers = 4,pin_memory = True)

        image_features_path = os.path.join(model_path, 'image_features')
        if not os.path.exists(image_features_path):
            os.makedirs(image_features_path)

        # get image features from encoder
        generate_image_feature('train', image_features_path, train_loader, encoder)
        generate_image_feature('val', image_features_path, val_loader, encoder)
        generate_image_feature('test', image_features_path, test_loader, encoder)

        train_data = CocoObjectGenderFeature(args, image_features_path, split = 'train')
        train_loader = torch.utils.data.DataLoader(train_data, batch_size = args.batch_size,
                    shuffle = True, num_workers = 6, pin_memory = True)

        val_data = CocoObjectGenderFeature(args, image_features_path, split = 'val')
        val_loader = torch.utils.data.DataLoader(val_data, batch_size = args.batch_size,
                    shuffle = True, num_workers = 6, pin_memory = True)

        test_data = CocoObjectGenderFeature(args, image_features_path, split = 'test')
        test_loader = torch.utils.data.DataLoader(test_data, batch_size = args.batch_size,
                    shuffle = True, num_workers = 6, pin_memory = True)

        model_save_dir = './attacker'
        if args.noise:
            args.exp_id += '_noise' + str(args.noise_scale)
        model_save_dir = os.path.join(model_save_dir, str(args.exp_id))

        if not os.path.exists(model_save_dir): os.makedirs(model_save_dir)

        for feature_type in acc_list.keys():

            #import pdb
            #pdb.set_trace()

            attacker = GenderClassifier(args, args.num_object)

            attacker = attacker.cuda()

            optimizer = optim.Adam(attacker.parameters(), lr=args.learning_rate, weight_decay = 1e-5)

            train_attacker(args.num_epochs, optimizer, attacker, encoder, train_loader, val_loader, \
               model_save_dir, feature_type)

            # evaluate best attacker on balanced test split
            best_attacker = torch.load(model_save_dir + '/best_attacker.pth.tar')
            attacker.load_state_dict(best_attacker['state_dict'])
            _, val_acc = epoch_pass(0, val_loader, attacker, encoder, None, False, feature_type)
            val_acc = 0.5 + abs(val_acc - 0.5)
            _, test_acc = epoch_pass(0, test_loader, attacker, encoder, None, False, feature_type)
            test_acc = 0.5 + abs(test_acc - 0.5)
            acc_list[feature_type].append(test_acc)
            print('round {} feature type: {}, test acc: {}, val acc: {}'.format(i, feature_type, \
                    test_acc, val_acc))

    for feature_type in acc_list.keys():
        print(acc_list[feature_type], np.std(np.array(acc_list[feature_type])))
        print('{} average leakage: {}'.format(feature_type, np.mean(np.array(acc_list[feature_type]))))