Ejemplo n.º 1
0
def main(dataset,
         augment=False,
         use_scattering=False,
         size=None,
         batch_size=2048,
         mini_batch_size=256,
         sample_batches=False,
         lr=1,
         optim="SGD",
         momentum=0.9,
         nesterov=False,
         noise_multiplier=1,
         max_grad_norm=0.1,
         epochs=100,
         input_norm=None,
         num_groups=None,
         bn_noise_multiplier=None,
         max_epsilon=None,
         logdir=None,
         early_stop=True,
         seed=0):
    torch.manual_seed(seed)
    logger = Logger(logdir)
    device = get_device()

    train_data, test_data = get_data(dataset, augment=augment)

    if use_scattering:
        scattering, K, _ = get_scatter_transform(dataset)
        scattering.to(device)
    else:
        scattering = None
        K = 3 if len(train_data.data.shape) == 4 else 1

    bs = batch_size
    assert bs % mini_batch_size == 0
    n_acc_steps = bs // mini_batch_size

    # Batch accumulation and data augmentation with Poisson sampling isn't implemented
    if sample_batches:
        assert n_acc_steps == 1
        assert not augment

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=mini_batch_size,
                                               shuffle=True,
                                               num_workers=1,
                                               pin_memory=True)

    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=mini_batch_size,
                                              shuffle=False,
                                              num_workers=1,
                                              pin_memory=True)

    rdp_norm = 0
    if input_norm == "BN":
        # compute noisy data statistics or load from disk if pre-computed
        save_dir = f"bn_stats/{dataset}"
        os.makedirs(save_dir, exist_ok=True)
        bn_stats, rdp_norm = scatter_normalization(
            train_loader,
            scattering,
            K,
            device,
            len(train_data),
            len(train_data),
            noise_multiplier=bn_noise_multiplier,
            orders=ORDERS,
            save_dir=save_dir)
        model = CNNS[dataset](K, input_norm="BN", bn_stats=bn_stats, size=size)
    else:
        model = CNNS[dataset](K,
                              input_norm=input_norm,
                              num_groups=num_groups,
                              size=size)

    model.to(device)

    if use_scattering and augment:
        model = nn.Sequential(scattering, model)
        train_loader = torch.utils.data.DataLoader(train_data,
                                                   batch_size=mini_batch_size,
                                                   shuffle=True,
                                                   num_workers=1,
                                                   pin_memory=True,
                                                   drop_last=True)
    else:
        # pre-compute the scattering transform if necessery
        train_loader = get_scattered_loader(train_loader,
                                            scattering,
                                            device,
                                            drop_last=True,
                                            sample_batches=sample_batches)
        test_loader = get_scattered_loader(test_loader, scattering, device)

    print(f"model has {get_num_params(model)} parameters")

    if optim == "SGD":
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=momentum,
                                    nesterov=nesterov)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    privacy_engine = PrivacyEngine(
        model,
        batch_size=bs,
        sample_size=len(train_data),
        alphas=ORDERS,
        noise_multiplier=noise_multiplier,
        max_grad_norm=max_grad_norm,
    )
    privacy_engine.attach(optimizer)

    best_acc = 0
    flat_count = 0

    results = dict(train_zeon=[],
                   train_xent=[],
                   test_zeon=[],
                   test_xent=[],
                   epoch=[])
    for epoch in range(0, epochs):
        print(f"\nEpoch: {epoch}")

        train_loss, train_acc = train(model,
                                      train_loader,
                                      optimizer,
                                      n_acc_steps=n_acc_steps)
        test_loss, test_acc = test(model, test_loader)

        results['train_zeon'].append(train_acc)
        results['train_xent'].append(train_loss)
        results['test_zeon'].append(test_acc)
        results['test_xent'].append(test_loss)
        results['epoch'].append(epoch)

        if noise_multiplier > 0:
            rdp_sgd = get_renyi_divergence(
                privacy_engine.sample_rate,
                privacy_engine.noise_multiplier) * privacy_engine.steps
            epsilon, _ = get_privacy_spent(rdp_norm + rdp_sgd)
            epsilon2, _ = get_privacy_spent(rdp_sgd)
            print(f"ε = {epsilon:.3f} (sgd only: ε = {epsilon2:.3f})")

            if max_epsilon is not None and epsilon >= max_epsilon:
                return
        else:
            epsilon = None

        logger.log_epoch(epoch, train_loss, train_acc, test_loss, test_acc,
                         epsilon)
        logger.log_scalar("epsilon/train", epsilon, epoch)

        # stop if we're not making progress
        if test_acc > best_acc:
            best_acc = test_acc
            flat_count = 0
        else:
            flat_count += 1
            if flat_count >= 20 and early_stop:
                print("plateau...")
                break

    # Write to file.
    record = {
        **results,
        **{
            'best_acc': best_acc,
            'seed': seed,
            'dataset': dataset
        }
    }
    record_path = os.path.join('.', 'record', f'{dataset}-{seed}.json')
    os.makedirs(os.path.dirname(record_path), exist_ok=True)
    with open(record_path, 'w') as f:
        json.dump(record, f, indent=4)
    import logging
    logging.warning(f'Wrote to file: {record_path}')
Ejemplo n.º 2
0
def main(tiny_images=None,
         model="cnn",
         augment=False,
         use_scattering=False,
         batch_size=2048,
         mini_batch_size=256,
         lr=1,
         lr_start=None,
         optim="SGD",
         momentum=0.9,
         noise_multiplier=1,
         max_grad_norm=0.1,
         epochs=100,
         bn_noise_multiplier=None,
         max_epsilon=None,
         data_size=550000,
         delta=1e-6,
         logdir=None):
    logger = Logger(logdir)

    device = get_device()

    bs = batch_size
    assert bs % mini_batch_size == 0
    n_acc_steps = bs // mini_batch_size

    train_data, test_data = get_data("cifar10", augment=augment)
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=100,
                                               shuffle=False,
                                               num_workers=4,
                                               pin_memory=True)

    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=100,
                                              shuffle=False,
                                              num_workers=4,
                                              pin_memory=True)

    if isinstance(tiny_images, torch.utils.data.Dataset):
        train_data_aug = tiny_images
    else:
        print("loading tiny images...")
        train_data_aug, _ = get_data("cifar10_500K",
                                     augment=augment,
                                     aux_data_filename=tiny_images)

    scattering, K, (h, w) = None, None, (None, None)
    pre_scattered = False
    if use_scattering:
        scattering, K, (h, w) = get_scatter_transform("cifar10_500K")
        scattering.to(device)

    # if the whole data fits in memory, pre-compute the scattering
    if use_scattering and data_size <= 50000:
        loader = torch.utils.data.DataLoader(train_data_aug,
                                             batch_size=100,
                                             shuffle=False,
                                             num_workers=4)
        train_data_aug = get_scattered_dataset(loader, scattering, device,
                                               data_size)
        pre_scattered = True

    assert data_size <= len(train_data_aug)
    num_sup = min(data_size, 50000)
    num_batches = int(np.ceil(50000 / mini_batch_size))  # cifar-10 equivalent

    train_batch_sampler = SemiSupervisedSampler(data_size, num_batches,
                                                mini_batch_size)
    train_loader_aug = torch.utils.data.DataLoader(
        train_data_aug,
        batch_sampler=train_batch_sampler,
        num_workers=0 if pre_scattered else 4,
        pin_memory=not pre_scattered)

    rdp_norm = 0
    if model == "cnn":
        if use_scattering:
            save_dir = f"bn_stats/cifar10_500K"
            os.makedirs(save_dir, exist_ok=True)
            bn_stats, rdp_norm = scatter_normalization(
                train_loader,
                scattering,
                K,
                device,
                data_size,
                num_sup,
                noise_multiplier=bn_noise_multiplier,
                orders=ORDERS,
                save_dir=save_dir)
            model = CNNS["cifar10"](K, input_norm="BN", bn_stats=bn_stats)
            model = model.to(device)

            if not pre_scattered:
                model = nn.Sequential(scattering, model)
        else:
            model = CNNS["cifar10"](in_channels=3, internal_norm=False)

    elif model == "linear":
        save_dir = f"bn_stats/cifar10_500K"
        os.makedirs(save_dir, exist_ok=True)
        bn_stats, rdp_norm = scatter_normalization(
            train_loader,
            scattering,
            K,
            device,
            data_size,
            num_sup,
            noise_multiplier=bn_noise_multiplier,
            orders=ORDERS,
            save_dir=save_dir)
        model = ScatterLinear(K, (h, w), input_norm="BN", bn_stats=bn_stats)
        model = model.to(device)

        if not pre_scattered:
            model = nn.Sequential(scattering, model)
    else:
        raise ValueError(f"Unknown model {model}")
    model.to(device)

    if pre_scattered:
        test_loader = get_scattered_loader(test_loader, scattering, device)

    print(f"model has {get_num_params(model)} parameters")

    if optim == "SGD":
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=momentum)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    privacy_engine = PrivacyEngine(
        model,
        bs,
        data_size,
        alphas=ORDERS,
        noise_multiplier=noise_multiplier,
        max_grad_norm=max_grad_norm,
    )
    privacy_engine.attach(optimizer)

    best_acc = 0
    flat_count = 0

    for epoch in range(0, epochs):

        print(f"\nEpoch: {epoch} ({privacy_engine.steps} steps)")
        train_loss, train_acc = train(model,
                                      train_loader_aug,
                                      optimizer,
                                      n_acc_steps=n_acc_steps)
        test_loss, test_acc = test(model, test_loader)

        if noise_multiplier > 0:
            print(f"sample_rate={privacy_engine.sample_rate}, "
                  f"mul={privacy_engine.noise_multiplier}, "
                  f"steps={privacy_engine.steps}")
            rdp_sgd = get_renyi_divergence(
                privacy_engine.sample_rate,
                privacy_engine.noise_multiplier) * privacy_engine.steps
            epsilon, _ = get_privacy_spent(rdp_norm + rdp_sgd,
                                           target_delta=delta)
            epsilon2, _ = get_privacy_spent(rdp_sgd, target_delta=delta)
            print(f"ε = {epsilon:.3f} (sgd only: ε = {epsilon2:.3f})")

            if max_epsilon is not None and epsilon >= max_epsilon:
                return
        else:
            epsilon = None

        logger.log_epoch(epoch, train_loss, train_acc, test_loss, test_acc,
                         epsilon)
        logger.log_scalar("epsilon/train", epsilon, epoch)
        logger.log_scalar("cifar10k_loss/train", train_loss, epoch)
        logger.log_scalar("cifar10k_acc/train", train_acc, epoch)

        if test_acc > best_acc:
            best_acc = test_acc
            flat_count = 0
        else:
            flat_count += 1
            if flat_count >= 20:
                print("plateau...")
                return
MAX_GRAD_NORM = 0.1
MAX_EPS = 5

BATCH_SIZES = [512, 1024, 2048, 4096, 8192, 16384]
BASE_LRS = [0.125, 0.25, 0.5, 1.0]

TARGET_EPS = 3
TARGET_EPOCHS = [30, 60, 120]

BN_MULS = [6, 8]
GROUPS = [9, 27, 81]

for target_epoch in TARGET_EPOCHS:
    for bs in BATCH_SIZES:
        for bn_mul in BN_MULS:
            rdp_norm = 2 * get_renyi_divergence(1.0, bn_mul)
            mul = get_noise_mul(50000,
                                bs,
                                TARGET_EPS,
                                target_epoch,
                                rdp_init=rdp_norm)

            for base_lr in BASE_LRS:
                lr = (bs // 512) * base_lr

                print(
                    f"epoch={target_epoch}, bs={bs}, bn_mul={bn_mul}, lr={base_lr}*{bs//512}={lr}, mul={mul}"
                )
                logdir = f"logs/baselines/cifar10/bs={bs}_lr={lr}_mul={mul:.2f}_bn={bn_mul}"

                main(dataset="cifar10",
Ejemplo n.º 4
0
def main(feature_path=None,
         batch_size=2048,
         mini_batch_size=256,
         lr=1,
         optim="SGD",
         momentum=0.9,
         nesterov=False,
         noise_multiplier=1,
         max_grad_norm=0.1,
         max_epsilon=None,
         epochs=100,
         logdir=None):

    logger = Logger(logdir)

    device = get_device()

    # get pre-computed features
    x_train = np.load(f"{feature_path}_train.npy")
    x_test = np.load(f"{feature_path}_test.npy")

    train_data, test_data = get_data("cifar10", augment=False)
    y_train = np.asarray(train_data.targets)
    y_test = np.asarray(test_data.targets)

    trainset = torch.utils.data.TensorDataset(torch.from_numpy(x_train),
                                              torch.from_numpy(y_train))
    testset = torch.utils.data.TensorDataset(torch.from_numpy(x_test),
                                             torch.from_numpy(y_test))

    bs = batch_size
    assert bs % mini_batch_size == 0
    n_acc_steps = bs // mini_batch_size
    train_loader = torch.utils.data.DataLoader(trainset,
                                               batch_size=mini_batch_size,
                                               shuffle=True,
                                               num_workers=1,
                                               pin_memory=True,
                                               drop_last=True)
    test_loader = torch.utils.data.DataLoader(testset,
                                              batch_size=mini_batch_size,
                                              shuffle=False,
                                              num_workers=1,
                                              pin_memory=True)

    n_features = x_train.shape[-1]
    try:
        mean = np.load(f"{feature_path}_mean.npy")
        var = np.load(f"{feature_path}_var.npy")
    except FileNotFoundError:
        mean = np.zeros(n_features, dtype=np.float32)
        var = np.ones(n_features, dtype=np.float32)

    bn_stats = (torch.from_numpy(mean).to(device),
                torch.from_numpy(var).to(device))

    model = nn.Sequential(StandardizeLayer(bn_stats),
                          nn.Linear(n_features, 10)).to(device)

    if optim == "SGD":
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=momentum,
                                    nesterov=nesterov)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    privacy_engine = PrivacyEngine(
        model,
        sample_rate=bs / len(train_data),
        alphas=ORDERS,
        noise_multiplier=noise_multiplier,
        max_grad_norm=max_grad_norm,
    )
    privacy_engine.attach(optimizer)

    for epoch in range(0, epochs):
        print(f"\nEpoch: {epoch}")

        train_loss, train_acc = train(model,
                                      train_loader,
                                      optimizer,
                                      n_acc_steps=n_acc_steps)
        test_loss, test_acc = test(model, test_loader)

        if noise_multiplier > 0:
            rdp_sgd = get_renyi_divergence(
                privacy_engine.sample_rate,
                privacy_engine.noise_multiplier) * privacy_engine.steps
            epsilon, _ = get_privacy_spent(rdp_sgd)
            print(f"ε = {epsilon:.3f}")

            if max_epsilon is not None and epsilon >= max_epsilon:
                return
        else:
            epsilon = None

        logger.log_epoch(epoch, train_loss, train_acc, test_loss, test_acc,
                         epsilon)
Ejemplo n.º 5
0
def main(dataset,
         augment=False,
         batch_size=2048,
         mini_batch_size=256,
         sample_batches=False,
         lr=1,
         optim="SGD",
         momentum=0.9,
         nesterov=False,
         noise_multiplier=1,
         max_grad_norm=0.1,
         epochs=100,
         input_norm=None,
         num_groups=None,
         bn_noise_multiplier=None,
         max_epsilon=None,
         logdir=None):
    logger = Logger(logdir)
    device = get_device()

    train_data, test_data = get_data(dataset, augment=augment)
    scattering, K, (h, w) = get_scatter_transform(dataset)
    scattering.to(device)

    bs = batch_size
    assert bs % mini_batch_size == 0
    n_acc_steps = bs // mini_batch_size

    # Batch accumulation and data augmentation with Poisson sampling isn't implemented
    if sample_batches:
        assert n_acc_steps == 1
        assert not augment

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=mini_batch_size,
                                               shuffle=True,
                                               num_workers=1,
                                               pin_memory=True)

    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=mini_batch_size,
                                              shuffle=False,
                                              num_workers=1,
                                              pin_memory=True)

    rdp_norm = 0
    if input_norm == "BN":
        # compute noisy data statistics or load from disk if pre-computed
        save_dir = f"bn_stats/{dataset}"
        os.makedirs(save_dir, exist_ok=True)
        bn_stats, rdp_norm = scatter_normalization(
            train_loader,
            scattering,
            K,
            device,
            len(train_data),
            len(train_data),
            noise_multiplier=bn_noise_multiplier,
            orders=ORDERS,
            save_dir=save_dir)
        model = ScatterLinear(K, (h, w), input_norm="BN", bn_stats=bn_stats)
    else:
        model = ScatterLinear(K, (h, w),
                              input_norm=input_norm,
                              num_groups=num_groups)
    model.to(device)

    trainable_params = sum(p.numel() for p in model.parameters()
                           if p.requires_grad)
    print(f'model: {model}\n')
    print(f'has {trainable_params / 1e6:.4f} million trainable parameters')

    if augment:
        model = nn.Sequential(scattering, model)
        train_loader = torch.utils.data.DataLoader(train_data,
                                                   batch_size=mini_batch_size,
                                                   shuffle=True,
                                                   num_workers=1,
                                                   pin_memory=True,
                                                   drop_last=True)
        preprocessor = None
    else:
        preprocessor = lambda x, y: (scattering(x), y)

    # baseline Logistic Regression without privacy
    if optim == "LR":
        assert not augment
        X_train = []
        y_train = []
        X_test = []
        y_test = []
        for data, target in train_loader:
            with torch.no_grad():
                data = data.to(device)
                X_train.append(data.cpu().numpy().reshape(len(data), -1))
                y_train.extend(target.cpu().numpy())

        for data, target in test_loader:
            with torch.no_grad():
                data = data.to(device)
                X_test.append(data.cpu().numpy().reshape(len(data), -1))
                y_test.extend(target.cpu().numpy())

        import numpy as np
        X_train = np.concatenate(X_train, axis=0)
        X_test = np.concatenate(X_test, axis=0)
        y_train = np.asarray(y_train)
        y_test = np.asarray(y_test)

        print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

        for idx, C in enumerate([0.01, 0.1, 1.0, 10, 100]):
            clf = LogisticRegression(C=C, fit_intercept=True)
            clf.fit(X_train, y_train)

            train_acc = 100 * clf.score(X_train, y_train)
            test_acc = 100 * clf.score(X_test, y_test)
            print(f"C={C}, "
                  f"Acc train = {train_acc: .2f}, "
                  f"Acc test = {test_acc: .2f}")

            logger.log_epoch(idx, 0, train_acc, 0, test_acc, None)
        return

    print(f"model has {get_num_params(model)} parameters")

    if optim == "SGD":
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=momentum,
                                    nesterov=nesterov)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    privacy_engine = PrivacyEngine(
        model,
        batch_size=bs,
        sample_size=len(train_data),
        alphas=ORDERS,
        noise_multiplier=noise_multiplier,
        max_grad_norm=max_grad_norm,
    )
    privacy_engine.attach(optimizer)

    for epoch in range(0, epochs):
        print(f"\nEpoch: {epoch}")
        train_loss, train_acc = train(model,
                                      train_loader,
                                      optimizer,
                                      n_acc_steps=n_acc_steps,
                                      preprocessor=preprocessor)
        test_loss, test_acc = test(model,
                                   test_loader,
                                   preprocessor=preprocessor)

        if noise_multiplier > 0:
            rdp_sgd = get_renyi_divergence(
                privacy_engine.sample_rate,
                privacy_engine.noise_multiplier) * privacy_engine.steps
            epsilon, _ = get_privacy_spent(rdp_norm + rdp_sgd)
            epsilon2, _ = get_privacy_spent(rdp_sgd)
            print(f"ε = {epsilon:.3f} (sgd only: ε = {epsilon2:.3f})")

            if max_epsilon is not None and epsilon >= max_epsilon:
                return
        else:
            epsilon = None

        logger.log_epoch(epoch, train_loss, train_acc, test_loss, test_acc,
                         epsilon)
        logger.log_scalar("epsilon/train", epsilon, epoch)