Ejemplo n.º 1
0
    def test_poisson_sampling(self):
        B = 1
        N = 10
        d = 10
        dataset = [(i, torch.randn(d), torch.randn(d)) for i in range(N)]

        model = nn.Linear(d, d)
        optimizer = optim.SGD(model.parameters(), lr=0.1)
        engine = PrivacyEngine(
            model,
            sample_rate=B / N,
            target_epsilon=1.0,
            epochs=10,
            poisson=True,
            max_grad_norm=1,
            sample_size=N,
        )
        engine.attach(optimizer)

        generator = torch.Generator()
        generator.manual_seed(7)
        sampler = UniformWithReplacementSampler(
            num_samples=N, sample_rate=B / N, generator=generator
        )
        dataloader = torch.utils.data.DataLoader(dataset, batch_sampler=sampler)

        # Sampler with seed=7 should generate [], [7], [], [], [9], [0], [], [], [1], [4]
        for (_, x, y) in dataloader:
            prediction = model(x)
            loss = torch.mean((prediction - y) ** 2)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
Ejemplo n.º 2
0
def train_model(net,trainloader,trainset,device,dp):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(net.parameters(),lr=.001, momentum=.9)
    if dp == True:
        print('adding privacy engine')
        # if we are training with differential privacy, create the engine
        privacy_engine = PrivacyEngine(
            net,
            4,
            len(trainloader),
            alphas=[1, 10, 100],
            noise_multiplier=1.3,
            max_grad_norm=1.0,
        )   
        privacy_engine.attach(optimizer)

    for epoch in range(5):  # currently training for 5 epochs
        print(f'epoch: {epoch}')
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data[0].to(device), data[1].to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
Ejemplo n.º 3
0
    def test_privacy_engine_virtual_step_example(self):
        # IMPORTANT: When changing this code you also need to update
        # the docstring for opacus.privacy_engine.PrivacyEngine.virtual_step()
        model = nn.Linear(16, 2)
        dataloader = []
        batch_size = 64
        sample_size = 256
        sample_rate = batch_size / sample_size

        for _ in range(64):
            data = torch.randn(4, 16)
            labels = torch.randint(0, 2, (4, ))
            dataloader.append((data, labels))

        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=0.05)

        privacy_engine = PrivacyEngine(
            model,
            sample_rate=sample_rate,
            noise_multiplier=0.8,
            max_grad_norm=0.5,
        )
        privacy_engine.attach(optimizer)

        for i, (X, y) in enumerate(dataloader):
            logits = model(X)
            loss = criterion(logits, y)
            loss.backward()
            if i % 16 == 15:
                optimizer.step()  # this will call privacy engine's step()
                optimizer.zero_grad()
            else:
                optimizer.virtual_step(
                )  # this will call privacy engine's virtual_step()
Ejemplo n.º 4
0
def main():
    run_results = []
    for _ in range(N_RUNS):
        model = Inception3(num_classes=10).to(DEVICE)

        optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0)
        if not DISABLE_DP:
            privacy_engine = PrivacyEngine(
                model,
                batch_size=BATCH_SIZE,
                sample_size=len(train_loader.dataset),
                alphas=[
                    1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
                noise_multiplier=SIGMA,
                max_grad_norm=GRAD_NORM,
                secure_rng=SECURE_RNG,
            )
            privacy_engine.attach(optimizer)
        for epoch in range(1, EPOCHS + 1):
            train(model, DEVICE, test_loader, optimizer, epoch)
        run_results.append(test(model, DEVICE, test_loader))

    if len(run_results) > 1:
        print(
            "Accuracy averaged over {} runs: {:.2f}% ± {:.2f}%".format(
                len(run_results), np.mean(run_results) *
                100, np.std(run_results) * 100
            )
        )
Ejemplo n.º 5
0
def initialize_training(parameters, learning_rate, local_dp):
    """
    Initializes the model, optimizer and scheduler and shares the parameters
    with all the workers in the group.
    This should be sent from server to all nodes.
    Args:
        learning_rate: The learning rate for training.
        local_dp: bool whether to apply local_dp or not.
    Returns:
        Returns the device, model, optimizer and scheduler.
    """

    # Determine the device to train on
    use_cuda = torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    # Initialize model and send parameters of server to all workers
    model = Net().to(device)

    # initializing optimizer and scheduler
    optimizer = optim.SGD(parameters, lr=learning_rate, momentum=0.5)

    if local_dp:
        privacy_engine = PrivacyEngine(model, batch_size=64,
                                        sample_size=60000, alphas=range(2, 32), noise_multiplier=1.3,
                                        max_grad_norm=1.0, )
        privacy_engine.attach(optimizer)

    # returns device, model, optimizer which will be needed in train and test
    return device, optimizer, model
Ejemplo n.º 6
0
def main(args):
    print(args)
    assert args.dpsgd
    torch.backends.cudnn.benchmark = True

    mdict = model_dict.copy()
    mdict['lstm'] = LSTMNet

    train_data, train_labels = get_data(args)
    model = mdict[args.experiment](vocab_size=args.max_features,
                                   batch_size=args.batch_size).cuda()
    optimizer = optim.SGD(model.parameters(),
                          lr=args.learning_rate,
                          momentum=0)
    loss_function = nn.CrossEntropyLoss(
    ) if args.experiment != 'logreg' else nn.BCELoss()

    privacy_engine = PrivacyEngine(
        model,
        batch_size=args.batch_size,
        sample_size=len(train_data),
        alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
        noise_multiplier=args.sigma,
        max_grad_norm=args.max_per_sample_grad_norm,
    )
    privacy_engine.attach(optimizer)

    timings = []
    for epoch in range(1, args.epochs + 1):
        start = time.perf_counter()
        dataloader = data.dataloader(train_data, train_labels, args.batch_size)
        for batch_idx, (x, y) in enumerate(dataloader):
            x, y = x.cuda(non_blocking=True), y.cuda(non_blocking=True)
            model.zero_grad()
            outputs = model(x)
            loss = loss_function(outputs, y)
            loss.backward()
            optimizer.step()
        torch.cuda.synchronize()
        duration = time.perf_counter() - start
        print("Time Taken for Epoch: ", duration)
        timings.append(duration)

        if args.dpsgd:
            epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent(
                args.delta)
            print(
                f"Train Epoch: {epoch} \t"
                # f"Loss: {np.mean(losses):.6f} "
                f"(ε = {epsilon:.2f}, δ = {args.delta}) for α = {best_alpha}")
        else:
            print(f"Train Epoch: {epoch} \t Loss: {np.mean(losses):.6f}")

    if not args.no_save:
        utils.save_runtimes(__file__.split('.')[0], args, timings)
    else:
        print('Not saving!')
    print('Done!')
Ejemplo n.º 7
0
def demo_basic(rank,
               world_size,
               weight,
               dp,
               noise_multiplier=0,
               max_grad_norm=1e8):
    # We don't want the 2 GPUs to work on the same examples/labels in parallel
    torch.manual_seed(rank)
    batch_size = 32
    withdp = "with" + ("out " if not dp else "")
    print(
        f"Running basic DDP {withdp} differential privacy example on rank {rank}."
    )

    device = setup_and_get_device(rank, world_size)

    # create model and move it to GPU with id rank
    model = ToyModel().to(device)
    print(f"Initial weight: {model.net1.weight.data}")

    # Freeze all the parameters except one, to ensure that the noise is the same
    # (the DDP hook does not browse the layers in the same order as the naive implementation)
    model.net1.bias.requires_grad = False
    model.net2.bias.requires_grad = False
    model.net2.weight.requires_grad = False

    if dp:
        ddp_model = DPDDP(model)
        engine = PrivacyEngine(
            ddp_model,
            batch_size=batch_size,
            sample_size=10 * batch_size,
            alphas=PRIVACY_ALPHAS,
            noise_multiplier=noise_multiplier,
            max_grad_norm=[max_grad_norm],
        )
        engine.random_number_generator = engine._set_seed(0)
    else:
        ddp_model = DDP(model, device_ids=[device])

    loss_fn = nn.MSELoss()
    optimizer = optim.SGD(ddp_model.parameters(), lr=1)
    if dp:
        engine.attach(optimizer)

    optimizer.zero_grad()
    labels = torch.randn(batch_size, 5).to(device)

    outputs = ddp_model(torch.randn(batch_size, 10).to(device))
    loss_fn(outputs, labels).backward()
    optimizer.step()

    weight.copy_(model.net1.weight.data.cpu())

    cleanup()
def initialize_dp(model, optimizer, sample_rate, dp_sigma):
    privacy_engine = PrivacyEngine(
        model,
        sample_rate = sample_rate * N_ACCUMULATION_STEPS,
        # epochs = EPOCHS,
        # target_epsilon = EPSILON,
        target_delta = DELTA,
        noise_multiplier = dp_sigma, 
        max_grad_norm = MAX_GRAD_NORM,
    )
    privacy_engine.attach(optimizer)
 def test_privacy_engine_class_example(self):
     # IMPORTANT: When changing this code you also need to update
     # the docstring for opacus.privacy_engine.PrivacyEngine
     model = torch.nn.Linear(16, 32)  # An example model
     optimizer = torch.optim.SGD(model.parameters(), lr=0.05)
     privacy_engine = PrivacyEngine(
         model,
         sample_rate=0.01,
         noise_multiplier=1.3,
         max_grad_norm=1.0,
     )
     privacy_engine.attach(optimizer)  # That's it! Now it's business as usual.
Ejemplo n.º 10
0
 def test_model_validator(self):
     """
     Test that the privacy engine throws on attach
     if there are unsupported modules
     """
     privacy_engine = PrivacyEngine(
         models.resnet18(),
         sample_rate=self.SAMPLE_RATE,
         alphas=self.ALPHAS,
         noise_multiplier=1.3,
         max_grad_norm=1,
     )
     with self.assertRaises(IncompatibleModuleException):
         privacy_engine.attach(self.private_optimizer)
Ejemplo n.º 11
0
def client(cur_net, current_iter, current_server_rank_id, best_valid_loss, best_net_glob, server_flag):
	# local train
	cur_net.train()
	optimizer = get_optimizer(args, cur_net)
	loss_func = nn.CrossEntropyLoss()
	if args.dp:
		privacy_engine = PrivacyEngine(cur_net, batch_size=args.bs, sample_size=len(local_train_loader),
										alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
										noise_multiplier=0.3, max_grad_norm=1.2, secure_rng=args.secure_rng)
		privacy_engine.attach(optimizer)
	current_state_dict, current_loss = normal_train(args, cur_net, optimizer, loss_func, local_train_loader, valid_loader)

	if args.dp:
		privacy_engine.detach()

	# send the state_dict to current server
	if args.tphe:
		client_sockets[rank2idx[current_server_rank_id]].send(pickle.dumps([encrypt_torch_state_dict(pub_key, current_state_dict), current_loss]))
	else:
		client_sockets[rank2idx[current_server_rank_id]].send(pickle.dumps([current_state_dict, current_loss]))

	# recv the aggregated state dict from current server
	aggregated_state_dict = client_sockets[rank2idx[current_server_rank_id]].recv(int(args.buffer))
	aggregated_state_dict = pickle.loads(aggregated_state_dict)

	# parse aggregated state_dict
	parse_aggregated_state_dict(aggregated_state_dict, cur_net)

	# recv metadata
	metadata_list_pkl = client_sockets[rank2idx[current_server_rank_id]].recv(int(args.buffer))
	loss_avg, tmp_loss_valid, next_server_rank_id = pickle.loads(metadata_list_pkl)
	loss_train.append(loss_avg)
	loss_valid.append(tmp_loss_valid)
	print('Round{:3d}, Average loss {:.3f}'.format(current_iter, loss_avg))
	print('Round{:3d}, Validation loss {:.3f}'.format(current_iter, tmp_loss_valid))
	if tmp_loss_valid < best_valid_loss:
		best_valid_loss = tmp_loss_valid
		best_net_glob = copy.deepcopy(cur_net)
		print('SAVE BEST MODEL AT EPOCH {}'.format(current_iter))

	# update the metadata for server
	current_server_rank_id = next_server_rank_id
	if next_server_rank_id == args.rank:
		server_flag = True

	print("\33[31m\33[1m Current server rank id {} \33[0m".format(current_server_rank_id))

	return cur_net, current_server_rank_id, best_valid_loss, best_net_glob, server_flag
 def setUpOptimizer(
     self, model: nn.Module, data_loader: DataLoader, privacy_engine: bool = False
 ):
     # sample parameter values
     optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
     optimizer.zero_grad()
     if privacy_engine:
         pe = PrivacyEngine(
             model,
             sample_rate=data_loader.batch_size / len(data_loader.dataset),
             alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
             noise_multiplier=1.3,
             max_grad_norm=1,
         )
         pe.attach(optimizer)
     return optimizer
Ejemplo n.º 13
0
def demo_ddp_hook(rank, world_size, weight, dp, noise_multiplier,
                  max_grad_norm):
    torch.manual_seed(rank)
    batch_size = 32
    withdp = "with" + ("out " if not dp else "")
    print(
        f"Running DDP hook {withdp} differential privacy example on rank {rank}."
    )

    device = setup_and_get_device(rank, world_size, nonce=1)

    # create model and move it to GPU with id rank
    model = ToyModel().to(device)

    model.net1.bias.requires_grad = False
    model.net2.bias.requires_grad = False
    model.net2.weight.requires_grad = False

    ddp_model = DDP(model, device_ids=[device])

    if dp:
        engine = PrivacyEngine(
            ddp_model,
            batch_size=batch_size,
            sample_size=10 * batch_size,
            alphas=PRIVACY_ALPHAS,
            noise_multiplier=noise_multiplier,
            max_grad_norm=[max_grad_norm],
        )
        engine.random_number_generator = engine._set_seed(0)

    loss_fn = nn.MSELoss()
    optimizer = optim.SGD(ddp_model.parameters(), lr=1)
    if dp:
        engine.attach(optimizer)

    optimizer.zero_grad()
    labels = torch.randn(batch_size, 5).to(device)

    outputs = ddp_model(torch.randn(batch_size, 10).to(device))
    loss_fn(outputs, labels).backward()
    optimizer.step()

    weight.copy_(model.net1.weight.data.cpu())

    del ddp_model
    cleanup()
Ejemplo n.º 14
0
def train_model(net, trainloader, trainset, device, dp):
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(net.parameters(), lr=LR)
    # optimizer = torch.optim.SGD(net.parameters(),lr=.003, momentum=.9)

    if dp == True:
        print('adding privacy engine')
        # if we are training with differential privacy, create the engine
        privacy_engine = PrivacyEngine(net,
                                       batch_size=VIRTUAL_BATCH_SIZE,
                                       sample_size=len(trainset),
                                       alphas=range(2, 32),
                                       noise_multiplier=NOISE_MULTIPLIER,
                                       max_grad_norm=MAX_GRAD_NORM)
        privacy_engine.attach(optimizer)

    for epoch in range(3):  # currently training for 5 epochs
        print(f'epoch: {epoch}')
        train(net, trainloader, optimizer, epoch, device, dp)
def demo_basic(rank, weight, world_size, dp):
    torch.manual_seed(world_size)
    batch_size = 32
    withdp = "with" + ("out " if not dp else "")
    print(
        f"Running basic DDP {withdp} differential privacy example on rank {rank}."
    )
    setup(rank, world_size)

    # create model and move it to GPU with id rank
    model = ToyModel().to(rank)
    if dp:
        ddp_model = DPDDP(model)
        engine = PrivacyEngine(
            ddp_model,
            batch_size=batch_size,
            sample_size=10 * batch_size,
            alphas=PRIVACY_ALPHAS,
            noise_multiplier=0,
            max_grad_norm=1e8,
        )
    else:
        ddp_model = DDP(model, device_ids=[rank])

    loss_fn = nn.MSELoss()
    optimizer = optim.SGD(ddp_model.parameters(), lr=1)
    if dp:
        engine.attach(optimizer)

    # if rank == 0:
    #     print(model.net1.weight)
    optimizer.zero_grad()
    labels = torch.randn(batch_size, 5).to(rank)
    outputs = ddp_model(torch.randn(batch_size, 10).to(rank))
    loss_fn(outputs, labels).backward()
    optimizer.step()
    # if rank == 0:
    #     print(model.net1.weight)

    weight.copy_(model.net1.weight.data.cpu())

    cleanup()
    def setUp_init_model(
        self, private=False, state_dict=None, model=None, **privacy_engine_kwargs
    ):
        model = model or SampleConvNet()
        optimizer = torch.optim.SGD(model.parameters(), lr=self.LR, momentum=0)
        if state_dict:
            model.load_state_dict(state_dict)

        if private:
            if len(privacy_engine_kwargs) == 0:
                privacy_engine_kwargs = self.privacy_default_params
            privacy_engine = PrivacyEngine(
                model,
                batch_size=self.BATCH_SIZE,
                sample_size=self.DATA_SIZE,
                alphas=self.ALPHAS,
                **privacy_engine_kwargs,
            )
            privacy_engine.attach(optimizer)

        return model, optimizer
Ejemplo n.º 17
0
 def setUpOptimizer(
     self, model: nn.Module, data_loader: DataLoader, privacy_engine: bool = False
 ):
     # sample parameter values
     optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)
     optimizer.zero_grad()
     if privacy_engine:
         pe = PrivacyEngine(
             model,
             # pyre-fixme[6]: Expected `int` for 2nd param but got `Optional[int]`.
             batch_size=data_loader.batch_size,
             # pyre-fixme[6]: Expected `Sized` for 1st param but got
             #  `Dataset[typing.Any]`.
             sample_size=len(data_loader.dataset),
             # pyre-fixme[6]: `+` is not supported for operand types
             #  `List[float]` and `List[int]`.
             alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
             noise_multiplier=1.3,
             max_grad_norm=1,
         )
         pe.attach(optimizer)
     return optimizer
Ejemplo n.º 18
0
def add_remove_ddp_hooks(rank,
                         world_size,
                         remaining_hooks,
                         dp,
                         noise_multiplier=0,
                         max_grad_norm=1e8):
    device = setup_and_get_device(rank, world_size, nonce=2)

    model = ToyModel().to(device)
    ddp_model = nn.parallel.DistributedDataParallel(model, device_ids=[device])

    engine = PrivacyEngine(
        ddp_model,
        batch_size=1,
        sample_size=10,
        alphas=PRIVACY_ALPHAS,
        noise_multiplier=noise_multiplier,
        max_grad_norm=[max_grad_norm],
    )

    optimizer = optim.SGD(ddp_model.parameters(), lr=1)

    engine.attach(optimizer)

    remaining_hooks["attached"] = {
        p: p._backward_hooks
        for p in engine.module.parameters() if p._backward_hooks
    }
    engine.detach()

    remaining_hooks["detached"] = {
        p: p._backward_hooks
        for p in engine.module.parameters() if p._backward_hooks
    }

    cleanup()
Ejemplo n.º 19
0
def main():
    args = parser.parse_args()
    device = torch.device(args.device)
    root = Path(args.data_root)

    all_filenames = list(root.glob("**/*.txt"))
    print(
        f"At root {root.absolute()}, found the following files: {all_filenames}"
    )
    all_letters = string.ascii_letters + " .,;'#"
    n_letters = len(all_letters)

    category_lines, all_categories, n_categories = build_category_lines(
        all_filenames, all_letters)
    category_lines_train, category_lines_val = split_data_train_eval(
        category_lines, args.train_eval_split)
    rnn = CharNNClassifier(n_letters, args.n_hidden, n_categories, n_letters,
                           args.batch_size).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(rnn.parameters(), lr=args.learning_rate)

    if not args.disable_dp:
        privacy_engine = PrivacyEngine(
            rnn,
            batch_size=args.batch_size,
            sample_size=get_dataset_size(category_lines_train),
            alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
            noise_multiplier=args.sigma,
            max_grad_norm=args.max_per_sample_grad_norm,
            batch_first=False,
        )
        privacy_engine.attach(optimizer)

    # Measure time elapsed for profiling training
    def time_since(since):
        now = time.time()
        s = now - since
        m = math.floor(s / 60)
        s -= m * 60
        return "%dm %ds" % (m, s)

    # Keep track of losses for tracking
    current_loss = 0

    start_time = time.time()
    for iteration in tqdm(range(1, args.iterations + 1)):
        # Get a random training input and target batch
        _, _, category_tensors, line_tensors = get_random_batch(
            category_lines_train,
            args.batch_size,
            all_categories,
            all_letters,
            n_letters,
            args,
            device,
        )
        output, loss = train(rnn, criterion, optimizer, category_tensors,
                             line_tensors, device)
        current_loss += loss

        # Print iteration number, loss, name and guess
        if iteration % print_every == 0:
            acc = get_eval_metrics(
                rnn,
                category_lines_val,
                all_categories,
                all_letters,
                n_letters,
                args.batch_size,
                args.max_seq_length,
                device,
            )
            time_elapsed = time_since(start_time)

            if not args.disable_dp:
                epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent(
                    args.delta)
                print(
                    f"Iteration={iteration} / Time elapsed: {time_elapsed} / Loss={loss:.4f} / "
                    f"Eval Accuracy:{acc*100:.2f} / "
                    f"Ɛ = {epsilon:.2f}, 𝛿 = {args.delta:.2f}) for α = {best_alpha:.2f}"
                )
            else:
                print(
                    f"Iteration={iteration} / Time elapsed: {time_elapsed} / Loss={loss:.4f} / "
                    f"Eval Accuracy:{acc*100:.2f}")
Ejemplo n.º 20
0
def main(dataset,
         augment=False,
         use_scattering=False,
         size=None,
         batch_size=2048,
         mini_batch_size=256,
         sample_batches=False,
         lr=1,
         optim="SGD",
         momentum=0.9,
         nesterov=False,
         noise_multiplier=1,
         max_grad_norm=0.1,
         epochs=100,
         input_norm=None,
         num_groups=None,
         bn_noise_multiplier=None,
         max_epsilon=None,
         logdir=None,
         early_stop=True,
         seed=0):
    torch.manual_seed(seed)
    logger = Logger(logdir)
    device = get_device()

    train_data, test_data = get_data(dataset, augment=augment)

    if use_scattering:
        scattering, K, _ = get_scatter_transform(dataset)
        scattering.to(device)
    else:
        scattering = None
        K = 3 if len(train_data.data.shape) == 4 else 1

    bs = batch_size
    assert bs % mini_batch_size == 0
    n_acc_steps = bs // mini_batch_size

    # Batch accumulation and data augmentation with Poisson sampling isn't implemented
    if sample_batches:
        assert n_acc_steps == 1
        assert not augment

    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=mini_batch_size,
                                               shuffle=True,
                                               num_workers=1,
                                               pin_memory=True)

    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=mini_batch_size,
                                              shuffle=False,
                                              num_workers=1,
                                              pin_memory=True)

    rdp_norm = 0
    if input_norm == "BN":
        # compute noisy data statistics or load from disk if pre-computed
        save_dir = f"bn_stats/{dataset}"
        os.makedirs(save_dir, exist_ok=True)
        bn_stats, rdp_norm = scatter_normalization(
            train_loader,
            scattering,
            K,
            device,
            len(train_data),
            len(train_data),
            noise_multiplier=bn_noise_multiplier,
            orders=ORDERS,
            save_dir=save_dir)
        model = CNNS[dataset](K, input_norm="BN", bn_stats=bn_stats, size=size)
    else:
        model = CNNS[dataset](K,
                              input_norm=input_norm,
                              num_groups=num_groups,
                              size=size)

    model.to(device)

    if use_scattering and augment:
        model = nn.Sequential(scattering, model)
        train_loader = torch.utils.data.DataLoader(train_data,
                                                   batch_size=mini_batch_size,
                                                   shuffle=True,
                                                   num_workers=1,
                                                   pin_memory=True,
                                                   drop_last=True)
    else:
        # pre-compute the scattering transform if necessery
        train_loader = get_scattered_loader(train_loader,
                                            scattering,
                                            device,
                                            drop_last=True,
                                            sample_batches=sample_batches)
        test_loader = get_scattered_loader(test_loader, scattering, device)

    print(f"model has {get_num_params(model)} parameters")

    if optim == "SGD":
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=momentum,
                                    nesterov=nesterov)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    privacy_engine = PrivacyEngine(
        model,
        batch_size=bs,
        sample_size=len(train_data),
        alphas=ORDERS,
        noise_multiplier=noise_multiplier,
        max_grad_norm=max_grad_norm,
    )
    privacy_engine.attach(optimizer)

    best_acc = 0
    flat_count = 0

    results = dict(train_zeon=[],
                   train_xent=[],
                   test_zeon=[],
                   test_xent=[],
                   epoch=[])
    for epoch in range(0, epochs):
        print(f"\nEpoch: {epoch}")

        train_loss, train_acc = train(model,
                                      train_loader,
                                      optimizer,
                                      n_acc_steps=n_acc_steps)
        test_loss, test_acc = test(model, test_loader)

        results['train_zeon'].append(train_acc)
        results['train_xent'].append(train_loss)
        results['test_zeon'].append(test_acc)
        results['test_xent'].append(test_loss)
        results['epoch'].append(epoch)

        if noise_multiplier > 0:
            rdp_sgd = get_renyi_divergence(
                privacy_engine.sample_rate,
                privacy_engine.noise_multiplier) * privacy_engine.steps
            epsilon, _ = get_privacy_spent(rdp_norm + rdp_sgd)
            epsilon2, _ = get_privacy_spent(rdp_sgd)
            print(f"ε = {epsilon:.3f} (sgd only: ε = {epsilon2:.3f})")

            if max_epsilon is not None and epsilon >= max_epsilon:
                return
        else:
            epsilon = None

        logger.log_epoch(epoch, train_loss, train_acc, test_loss, test_acc,
                         epsilon)
        logger.log_scalar("epsilon/train", epsilon, epoch)

        # stop if we're not making progress
        if test_acc > best_acc:
            best_acc = test_acc
            flat_count = 0
        else:
            flat_count += 1
            if flat_count >= 20 and early_stop:
                print("plateau...")
                break

    # Write to file.
    record = {
        **results,
        **{
            'best_acc': best_acc,
            'seed': seed,
            'dataset': dataset
        }
    }
    record_path = os.path.join('.', 'record', f'{dataset}-{seed}.json')
    os.makedirs(os.path.dirname(record_path), exist_ok=True)
    with open(record_path, 'w') as f:
        json.dump(record, f, indent=4)
    import logging
    logging.warning(f'Wrote to file: {record_path}')
def main():
    args = parser.parse_args()
    device = torch.device(args.device)
    ds = NamesDataset(args.data_root)
    train_len = int(args.train_split * len(ds))
    test_len = len(ds) - train_len

    print(f"{train_len} samples for training, {test_len} for testing")

    if args.secure_rng:
        try:
            import torchcsprng as prng
        except ImportError as e:
            msg = (
                "To use secure RNG, you must install the torchcsprng package! "
                "Check out the instructions here: https://github.com/pytorch/csprng#installation"
            )
            raise ImportError(msg) from e

        generator = prng.create_random_device_generator("/dev/urandom")

    else:
        generator = None

    train_ds, test_ds = torch.utils.data.random_split(ds,
                                                      [train_len, test_len],
                                                      generator=generator)

    model = CharNNClassifier(
        args.embedding_size,
        args.hidden_size,
        len(ds.labels),
        args.n_lstm_layers,
        args.bidirectional_lstm,
    )
    model = model.to(device)

    train_ds, test_ds = torch.utils.data.random_split(ds,
                                                      [train_len, test_len],
                                                      generator=generator)

    train_loader = DataLoader(
        train_ds,
        num_workers=8,
        pin_memory=True,
        generator=generator,
        batch_sampler=UniformWithReplacementSampler(
            num_samples=len(train_ds),
            sample_rate=args.sample_rate,
            generator=generator),
        collate_fn=padded_collate,
    )

    test_loader = DataLoader(
        test_ds,
        batch_size=args.batch_size_test,
        shuffle=False,
        num_workers=8,
        pin_memory=True,
        collate_fn=padded_collate,
    )

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate)

    if not args.disable_dp:
        privacy_engine = PrivacyEngine(
            model,
            sample_rate=args.sample_rate,
            alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
            noise_multiplier=args.sigma,
            max_grad_norm=args.max_per_sample_grad_norm,
            target_delta=args.delta,
            secure_rng=args.secure_rng,
        )
        privacy_engine.attach(optimizer)
    else:
        privacy_engine = None

    print("Train stats: \n")
    for epoch in tqdm(range(args.epochs)):
        train(model, criterion, optimizer, train_loader, epoch, device=device)
        if args.test_every:
            if epoch % args.test_every == 0:
                test(model, test_loader, privacy_engine, device=device)

    test(model, test_loader, privacy_engine, device=device)
Ejemplo n.º 22
0
    def train(self,
              data,
              categorical_columns=None,
              ordinal_columns=None,
              update_epsilon=None):
        if update_epsilon:
            self.epsilon = update_epsilon

        if isinstance(data, pd.DataFrame):
            for col in data.columns:
                data[col] = pd.to_numeric(data[col], errors="ignore")
            self.pd_cols = data.columns
            self.pd_index = data.pd_index
            data = data.to_numpy()
        elif not isinstance(data, np.ndarray):
            raise ValueError("Data must be a numpy array or pandas dataframe")

        dataset = TensorDataset(
            torch.from_numpy(data.astype("float32")).to(self.device))
        dataloader = DataLoader(dataset,
                                batch_size=self.batch_size,
                                shuffle=True,
                                drop_last=True)

        self.generator = Generator(self.latent_dim,
                                   data.shape[1],
                                   binary=self.binary).to(self.device)
        discriminator = Discriminator(data.shape[1]).to(self.device)
        optimizer_d = optim.Adam(discriminator.parameters(), lr=4e-4)

        privacy_engine = PrivacyEngine(
            discriminator,
            batch_size=self.batch_size,
            sample_size=len(data),
            alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
            noise_multiplier=3.5,
            max_grad_norm=1.0,
            clip_per_layer=True,
        )

        privacy_engine.attach(optimizer_d)
        optimizer_g = optim.Adam(self.generator.parameters(), lr=1e-4)

        criterion = nn.BCELoss()

        for epoch in range(self.epochs):
            for i, data in enumerate(dataloader):
                discriminator.zero_grad()

                real_data = data[0].to(self.device)

                # train with fake data
                noise = torch.randn(self.batch_size,
                                    self.latent_dim,
                                    1,
                                    1,
                                    device=self.device)
                noise = noise.view(-1, self.latent_dim)
                fake_data = self.generator(noise)
                label_fake = torch.full((self.batch_size, ),
                                        0,
                                        dtype=torch.float,
                                        device=self.device)
                output = discriminator(fake_data.detach())
                loss_d_fake = criterion(output, label_fake)
                loss_d_fake.backward()
                optimizer_d.step()

                # train with real data
                label_true = torch.full((self.batch_size, ),
                                        1,
                                        dtype=torch.float,
                                        device=self.device)
                output = discriminator(real_data.float())
                loss_d_real = criterion(output, label_true)
                loss_d_real.backward()
                optimizer_d.step()

                max_grad_norm = []
                for p in discriminator.parameters():
                    param_norm = p.grad.data.norm(2).item()
                    max_grad_norm.append(param_norm)

                privacy_engine.max_grad_norm = max_grad_norm

                # train generator
                self.generator.zero_grad()
                label_g = torch.full((self.batch_size, ),
                                     1,
                                     dtype=torch.float,
                                     device=self.device)
                output_g = discriminator(fake_data)
                loss_g = criterion(output_g, label_g)
                loss_g.backward()
                optimizer_g.step()

                # manually clear gradients
                for p in discriminator.parameters():
                    if hasattr(p, "grad_sample"):
                        del p.grad_sample
                # autograd_grad_sample.clear_backprops(discriminator)

                if self.delta is None:
                    self.delta = 1 / data.shape[0]

                eps, best_alpha = optimizer_d.privacy_engine.get_privacy_spent(
                    self.delta)

            if self.epsilon < eps:
                break
Ejemplo n.º 23
0
def main():
    parser = argparse.ArgumentParser(description="PyTorch CIFAR10 DP Training")
    parser.add_argument(
        "-j",
        "--workers",
        default=2,
        type=int,
        metavar="N",
        help="number of data loading workers (default: 2)",
    )
    parser.add_argument(
        "--epochs",
        default=90,
        type=int,
        metavar="N",
        help="number of total epochs to run",
    )
    parser.add_argument(
        "--start-epoch",
        default=1,
        type=int,
        metavar="N",
        help="manual epoch number (useful on restarts)",
    )
    parser.add_argument(
        "-b",
        "--batch-size",
        # This should be 256, but that OOMs using the prototype.
        default=64,
        type=int,
        metavar="N",
        help="mini-batch size (default: 64), this is the total "
        "batch size of all GPUs on the current node when "
        "using Data Parallel or Distributed Data Parallel",
    )
    parser.add_argument(
        "-na",
        "--n_accumulation_steps",
        default=1,
        type=int,
        metavar="N",
        help="number of mini-batches to accumulate into an effective batch",
    )
    parser.add_argument(
        "--lr",
        "--learning-rate",
        default=0.001,
        type=float,
        metavar="LR",
        help="initial learning rate",
        dest="lr",
    )
    parser.add_argument("--momentum",
                        default=0.9,
                        type=float,
                        metavar="M",
                        help="SGD momentum")
    parser.add_argument(
        "--wd",
        "--weight-decay",
        default=5e-4,
        type=float,
        metavar="W",
        help="SGD weight decay (default: 1e-4)",
        dest="weight_decay",
    )
    parser.add_argument(
        "-p",
        "--print-freq",
        default=10,
        type=int,
        metavar="N",
        help="print frequency (default: 10)",
    )
    parser.add_argument(
        "--resume",
        default="",
        type=str,
        metavar="PATH",
        help="path to latest checkpoint (default: none)",
    )
    parser.add_argument(
        "-e",
        "--evaluate",
        dest="evaluate",
        action="store_true",
        help="evaluate model on validation set",
    )
    parser.add_argument("--seed",
                        default=None,
                        type=int,
                        help="seed for initializing training. ")
    parser.add_argument(
        "--device",
        type=str,
        default="cuda",
        help="GPU ID for this process (default: 'cuda')",
    )
    parser.add_argument(
        "--sigma",
        type=float,
        default=1.0,
        metavar="S",
        help="Noise multiplier (default 1.0)",
    )
    parser.add_argument(
        "-c",
        "--max-per-sample-grad_norm",
        type=float,
        default=1.0,
        metavar="C",
        help="Clip per-sample gradients to this norm (default 1.0)",
    )
    parser.add_argument(
        "--disable-dp",
        action="store_true",
        default=False,
        help="Disable privacy training and just train with vanilla SGD",
    )
    parser.add_argument(
        "--secure-rng",
        action="store_true",
        default=False,
        help=
        "Enable Secure RNG to have trustworthy privacy guarantees. Comes at a performance cost",
    )
    parser.add_argument(
        "--delta",
        type=float,
        default=1e-5,
        metavar="D",
        help="Target delta (default: 1e-5)",
    )

    parser.add_argument(
        "--checkpoint-file",
        type=str,
        default="checkpoint",
        help="path to save check points",
    )
    parser.add_argument(
        "--data-root",
        type=str,
        default="../cifar10",
        help="Where CIFAR10 is/will be stored",
    )
    parser.add_argument("--log-dir",
                        type=str,
                        default="",
                        help="Where Tensorboard log will be stored")
    parser.add_argument(
        "--optim",
        type=str,
        default="Adam",
        help="Optimizer to use (Adam, RMSprop, SGD)",
    )

    args = parser.parse_args()
    args.disable_dp = True

    if args.disable_dp and args.n_accumulation_steps > 1:
        raise ValueError("Virtual steps only works with enabled DP")

    # The following few lines, enable stats gathering about the run
    # 1. where the stats should be logged
    stats.set_global_summary_writer(
        tensorboard.SummaryWriter(os.path.join("/tmp/stat", args.log_dir)))
    # 2. enable stats
    stats.add(
        # stats about gradient norms aggregated for all layers
        stats.Stat(stats.StatType.GRAD, "AllLayers", frequency=0.1),
        # stats about gradient norms per layer
        stats.Stat(stats.StatType.GRAD, "PerLayer", frequency=0.1),
        # stats about clipping
        stats.Stat(stats.StatType.GRAD, "ClippingStats", frequency=0.1),
        # stats on training accuracy
        stats.Stat(stats.StatType.TRAIN, "accuracy", frequency=0.01),
        # stats on validation accuracy
        stats.Stat(stats.StatType.TEST, "accuracy"),
    )

    # The following lines enable stat gathering for the clipping process
    # and set a default of per layer clipping for the Privacy Engine
    clipping = {"clip_per_layer": False, "enable_stat": True}

    if args.secure_rng:
        assert False
        try:
            import torchcsprng as prng
        except ImportError as e:
            msg = (
                "To use secure RNG, you must install the torchcsprng package! "
                "Check out the instructions here: https://github.com/pytorch/csprng#installation"
            )
            raise ImportError(msg) from e

        generator = prng.create_random_device_generator("/dev/urandom")

    else:
        generator = None

    augmentations = [
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
    ]
    normalize = [
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.2023, 0.1994, 0.2010)),
    ]
    train_transform = transforms.Compose(
        augmentations + normalize if args.disable_dp else normalize)

    test_transform = transforms.Compose(normalize)

    train_dataset = CIFAR10(root=args.data_root,
                            train=True,
                            download=True,
                            transform=train_transform)

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        batch_size=args.batch_size,
        shuffle=True,
        num_workers=args.workers,
        drop_last=True,
        generator=generator,
    )

    test_dataset = CIFAR10(root=args.data_root,
                           train=False,
                           download=True,
                           transform=test_transform)
    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.workers,
    )

    best_acc1 = 0
    device = torch.device(args.device)
    model = convert_batchnorm_modules(models.resnet18(num_classes=10))
    # model = CIFAR10Model()
    model = model.to(device)

    if args.optim == "SGD":
        optimizer = optim.SGD(
            model.parameters(),
            lr=args.lr,
            momentum=args.momentum,
            weight_decay=args.weight_decay,
        )
    elif args.optim == "RMSprop":
        optimizer = optim.RMSprop(model.parameters(), lr=args.lr)
    elif args.optim == "Adam":
        optimizer = optim.Adam(model.parameters(), lr=args.lr)
    else:
        raise NotImplementedError(
            "Optimizer not recognized. Please check spelling")

    if not args.disable_dp:
        privacy_engine = PrivacyEngine(
            model,
            batch_size=args.batch_size * args.n_accumulation_steps,
            sample_size=len(train_dataset),
            alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
            noise_multiplier=args.sigma,
            max_grad_norm=args.max_per_sample_grad_norm,
            secure_rng=args.secure_rng,
            **clipping,
        )
        privacy_engine.attach(optimizer)

    for epoch in range(args.start_epoch, args.epochs + 1):
        train(args, model, train_loader, optimizer, epoch, device)
        top1_acc = test(args, model, test_loader, device)

        # remember best acc@1 and save checkpoint
        is_best = top1_acc > best_acc1
        best_acc1 = max(top1_acc, best_acc1)

        save_checkpoint(
            {
                "epoch": epoch + 1,
                "arch": "ResNet18",
                "state_dict": model.state_dict(),
                "best_acc1": best_acc1,
                "optimizer": optimizer.state_dict(),
            },
            is_best,
            filename=args.checkpoint_file + ".tar",
        )
Ejemplo n.º 24
0
class CTGANSynthesizer(BaseSynthesizer):
    """Conditional Table GAN Synthesizer.

    This is the core class of the CTGAN project, where the different components
    are orchestrated together.
    For more details about the process, please check the [Modeling Tabular data using
    Conditional GAN](https://arxiv.org/abs/1907.00503) paper.
    Args:
        embedding_dim (int):
            Size of the random sample passed to the Generator. Defaults to 128.
        generator_dim (tuple or list of ints):
            Size of the output samples for each one of the Residuals. A Residual Layer
            will be created for each one of the values provided. Defaults to (256, 256).
        discriminator_dim (tuple or list of ints):
            Size of the output samples for each one of the Discriminator Layers. A Linear Layer
            will be created for each one of the values provided. Defaults to (256, 256).
        generator_lr (float):
            Learning rate for the generator. Defaults to 2e-4.
        generator_decay (float):
            Generator weight decay for the Adam Optimizer. Defaults to 1e-6.
        discriminator_lr (float):
            Learning rate for the discriminator. Defaults to 2e-4.
        discriminator_decay (float):
            Discriminator weight decay for the Adam Optimizer. Defaults to 1e-6.
        batch_size (int):
            Number of data samples to process in each step.
        discriminator_steps (int):
            Number of discriminator updates to do for each generator update.
            From the WGAN paper: https://arxiv.org/abs/1701.07875. WGAN paper
            default is 5. Default used is 1 to match original CTGAN implementation.
        log_frequency (boolean):
            Whether to use log frequency of categorical levels in conditional
            sampling. Defaults to ``True``.
        verbose (boolean):
            Whether to have print statements for progress results. Defaults to ``False``.
        epochs (int):
            Number of training epochs. Defaults to 300.
    """
    def __init__(self,
                 embedding_dim=128,
                 generator_dim=(256, 256),
                 discriminator_dim=(256, 256),
                 generator_lr=2e-4,
                 generator_decay=1e-6,
                 discriminator_lr=2e-4,
                 discriminator_decay=0,
                 pack=1,
                 batch_size=500,
                 discriminator_steps=1,
                 log_frequency=True,
                 verbose=False,
                 epochs=300,
                 epsilon=10,
                 delta=1e-5,
                 noise_multiplier=2,
                 max_grad_norm=1,
                 dp=True):

        assert batch_size % 2 == 0

        self._embedding_dim = embedding_dim
        self._generator_dim = generator_dim
        self._discriminator_dim = discriminator_dim

        self._generator_lr = generator_lr
        self._generator_decay = generator_decay
        self._discriminator_lr = discriminator_lr
        self._discriminator_decay = discriminator_decay

        self._pack = pack  #add this option to original CTGAN for swagness
        self._batch_size = batch_size
        self._discriminator_steps = discriminator_steps
        self._log_frequency = log_frequency
        self._verbose = verbose
        self._epochs = epochs
        self._epsilon = epsilon
        self._device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        self.trained_epochs = 0
        self.trained_epsilon = 0
        self._delta = delta
        self._noise_multiplier = noise_multiplier
        self.max_grad_norm = max_grad_norm
        self._dp = dp
        opacus.supported_layers_grad_samplers._create_or_extend_grad_sample = _custom_create_or_extend_grad_sample

    @staticmethod
    def _gumbel_softmax(logits, tau=1, hard=False, eps=1e-10, dim=-1):
        """Deals with the instability of the gumbel_softmax for older versions of torch.

        For more details about the issue:
        https://drive.google.com/file/d/1AA5wPfZ1kquaRtVruCd6BiYZGcDeNxyP/view?usp=sharing
        Args:
            logits:
                […, num_features] unnormalized log probabilities
            tau:
                non-negative scalar temperature
            hard:
                if True, the returned samples will be discretized as one-hot vectors,
                but will be differentiated as if it is the soft sample in autograd
            dim (int):
                a dimension along which softmax will be computed. Default: -1.
        Returns:
            Sampled tensor of same shape as logits from the Gumbel-Softmax distribution.
        """
        if version.parse(torch.__version__) < version.parse("1.2.0"):
            for i in range(10):
                transformed = functional.gumbel_softmax(logits,
                                                        tau=tau,
                                                        hard=hard,
                                                        eps=eps,
                                                        dim=dim)
                if not torch.isnan(transformed).any():
                    return transformed
            raise ValueError("gumbel_softmax returning NaN.")

        return functional.gumbel_softmax(logits,
                                         tau=tau,
                                         hard=hard,
                                         eps=eps,
                                         dim=dim)

    def _apply_activate(self, data):
        """Apply proper activation function to the output of the generator."""
        data_t = []
        st = 0
        for column_info in self._transformer.output_info_list:
            for span_info in column_info:
                if span_info.activation_fn == 'tanh':
                    ed = st + span_info.dim
                    data_t.append(torch.tanh(data[:, st:ed]))
                    st = ed
                elif span_info.activation_fn == 'softmax':
                    ed = st + span_info.dim
                    transformed = self._gumbel_softmax(data[:, st:ed], tau=0.2)
                    data_t.append(transformed)
                    st = ed
                else:
                    assert 0

        return torch.cat(data_t, dim=1)

    def _cond_loss(self, data, c, m):
        """Compute the cross entropy loss on the fixed discrete column."""
        loss = []
        st = 0
        st_c = 0
        for column_info in self._transformer.output_info_list:
            for span_info in column_info:
                if len(column_info
                       ) != 1 or span_info.activation_fn != "softmax":
                    # not discrete column
                    st += span_info.dim
                else:
                    ed = st + span_info.dim
                    ed_c = st_c + span_info.dim
                    tmp = functional.cross_entropy(data[:, st:ed],
                                                   torch.argmax(c[:,
                                                                  st_c:ed_c],
                                                                dim=1),
                                                   reduction='none')
                    loss.append(tmp)
                    st = ed
                    st_c = ed_c

        loss = torch.stack(loss, dim=1)

        return (loss * m).sum() / data.size()[0]

    def _validate_discrete_columns(self, train_data, discrete_columns):
        """Check whether ``discrete_columns`` exists in ``train_data``.

        Args:
            train_data (numpy.ndarray or pandas.DataFrame):
                Training Data. It must be a 2-dimensional numpy array or a pandas.DataFrame.
            discrete_columns (list-like):
                List of discrete columns to be used to generate the Conditional
                Vector. If ``train_data`` is a Numpy array, this list should
                contain the integer indices of the columns. Otherwise, if it is
                a ``pandas.DataFrame``, this list should contain the column names.
        """
        if isinstance(train_data, pd.DataFrame):
            invalid_columns = set(discrete_columns) - set(train_data.columns)
        elif isinstance(train_data, np.ndarray):
            invalid_columns = []
            for column in discrete_columns:
                if column < 0 or column >= train_data.shape[1]:
                    invalid_columns.append(column)
        else:
            raise TypeError(
                '``train_data`` should be either pd.DataFrame or np.array.')

        if invalid_columns:
            raise ValueError(
                'Invalid columns found: {}'.format(invalid_columns))

    def fit(self,
            train_data,
            discrete_columns=tuple(),
            epochs=None,
            epsilon=None):
        """Fit the CTGAN Synthesizer models to the training data.

        Args:
            train_data (numpy.ndarray or pandas.DataFrame):
                Training Data. It must be a 2-dimensional numpy array or a pandas.DataFrame.
            discrete_columns (list-like):
                List of discrete columns to be used to generate the Conditional
                Vector. If ``train_data`` is a Numpy array, this list should
                contain the integer indices of the columns. Otherwise, if it is
                a ``pandas.DataFrame``, this list should contain the column names.
        """
        self._validate_discrete_columns(train_data, discrete_columns)

        if epochs is None:
            epochs = self._epochs
        if epsilon is None:
            epsilon = self._epsilon
        if not self._dp:
            self.trained_epsilon = float("inf")

        self._transformer = DataTransformer()
        self._transformer.fit(train_data, discrete_columns)

        train_data = self._transformer.transform(train_data)

        self._data_sampler = DataSampler(train_data,
                                         self._transformer.output_info_list,
                                         self._log_frequency)

        data_dim = self._transformer.output_dimensions

        self._generator = Generator(
            self._embedding_dim + self._data_sampler.dim_cond_vec(),
            self._generator_dim, data_dim).to(self._device)

        self._discriminator = Discriminator(
            data_dim + self._data_sampler.dim_cond_vec(),
            self._discriminator_dim, self._pack).to(self._device)

        self._optimizerG = optim.Adam(self._generator.parameters(),
                                      lr=self._generator_lr,
                                      betas=(0.5, 0.9),
                                      weight_decay=self._generator_decay)

        self._optimizerD = optim.Adam(self._discriminator.parameters(),
                                      lr=self._discriminator_lr,
                                      betas=(0.5, 0.9),
                                      weight_decay=self._discriminator_decay)

        if self._dp:
            self._privacy_engine = PrivacyEngine(
                self._discriminator,
                self._batch_size / self._pack,
                len(train_data),
                alphas=[1 + x / 10.0
                        for x in range(1, 100)] + list(range(12, 64)),
                noise_multiplier=self._noise_multiplier,
                max_grad_norm=self.max_grad_norm,
                clip_per_layer=True,
                loss_reduction="sum",
            )
            self._privacy_engine.attach(self._optimizerD)

        mean = torch.zeros(self._batch_size,
                           self._embedding_dim,
                           device=self._device)
        std = mean + 1
        one = torch.tensor(1, dtype=torch.float).to(self._device)
        mone = one * -1

        steps_per_epoch = max(len(train_data) // self._batch_size, 1)
        for i in range(epochs):
            self.trained_epochs += 1

            if self._dp:
                if self.trained_epsilon >= self._epsilon:
                    print(
                        "Privacy budget of {:.2f} exausthed. Please specify an higher one in fit() to train more or disable differential privacy."
                        .format(self._epsilon))
                    return

            for id_ in range(steps_per_epoch):

                for n in range(self._discriminator_steps):
                    fakez = torch.normal(mean=mean, std=std)

                    condvec = self._data_sampler.sample_condvec(
                        self._batch_size)
                    if condvec is None:
                        c1, m1, col, opt = None, None, None, None
                        real = self._data_sampler.sample_data(
                            self._batch_size, col, opt)
                    else:
                        c1, m1, col, opt = condvec
                        c1 = torch.from_numpy(c1).to(self._device)
                        m1 = torch.from_numpy(m1).to(self._device)
                        fakez = torch.cat([fakez, c1], dim=1)

                        perm = np.arange(self._batch_size)
                        np.random.shuffle(perm)
                        real = self._data_sampler.sample_data(
                            self._batch_size, col[perm], opt[perm])
                        c2 = c1[perm]

                    fake = self._generator(fakez)
                    fakeact = self._apply_activate(fake)

                    real = torch.from_numpy(real.astype('float32')).to(
                        self._device)

                    if c1 is not None:
                        fake_cat = torch.cat([fakeact, c1], dim=1)
                        real_cat = torch.cat([real, c2], dim=1)
                    else:
                        real_cat = real
                        fake_cat = fake

                    self._optimizerD.zero_grad()

                    y_fake = self._discriminator(fake_cat)
                    y_real = self._discriminator(real_cat)

                    if not self._dp:
                        pen = self._discriminator.calc_gradient_penalty(
                            real_cat, fake_cat, self._device)
                        pen.backward(retain_graph=True)
                    loss_d = -torch.mean(y_real) + torch.mean(y_fake)

                    loss_d.backward()
                    self._optimizerD.step()

                fakez = torch.normal(mean=mean, std=std)
                condvec = self._data_sampler.sample_condvec(self._batch_size)

                if condvec is None:
                    c1, m1, col, opt = None, None, None, None
                else:
                    c1, m1, col, opt = condvec
                    c1 = torch.from_numpy(c1).to(self._device)
                    m1 = torch.from_numpy(m1).to(self._device)
                    fakez = torch.cat([fakez, c1], dim=1)

                fake = self._generator(fakez)
                fakeact = self._apply_activate(fake)

                if c1 is not None:
                    y_fake = self._discriminator(
                        torch.cat([fakeact, c1], dim=1))
                else:
                    y_fake = self._discriminator(fakeact)

                if condvec is None:
                    cross_entropy = 0
                else:
                    cross_entropy = self._cond_loss(fake, c1, m1)

                loss_g = -torch.mean(y_fake) + cross_entropy

                self._optimizerG.zero_grad()
                loss_g.backward()
                self._optimizerG.step()

                if self._dp:
                    for p in self._discriminator.parameters():
                        if hasattr(p, "grad_sample"):
                            del p.grad_sample

                    self.trained_epsilon, best_alpha = self._optimizerD.privacy_engine.get_privacy_spent(
                        self._delta)
                    if self.trained_epsilon >= epsilon:
                        print(
                            "Privacy budget of {:.2f} exausthed, training halted. Best alpha: {:.2f}"
                            .format(epsilon, best_alpha))
                        return

            if self._verbose:
                print(
                    f"Epoch {i+1}, epslion {self.trained_epsilon: .2f}, Loss G: {loss_g.detach().cpu(): .4f}, "
                    f"Loss D: {loss_d.detach().cpu(): .4f}",
                    flush=True)

        if self._dp:
            self._privacy_engine.detach()

    def sample(self, n, condition_column=None, condition_value=None):
        """Sample data similar to the training data.

        Choosing a condition_column and condition_value will increase the probability of the
        discrete condition_value happening in the condition_column.
        Args:
            n (int):
                Number of rows to sample.
            condition_column (string):
                Name of a discrete column.
            condition_value (string):
                Name of the category in the condition_column which we wish to increase the
                probability of happening.
        Returns:
            numpy.ndarray or pandas.DataFrame
        """
        if condition_column is not None and condition_value is not None:
            condition_info = self._transformer.convert_column_name_value_to_id(
                condition_column, condition_value)
            global_condition_vec = self._data_sampler.generate_cond_from_condition_column_info(
                condition_info, self._batch_size)
        else:
            global_condition_vec = None

        steps = n // self._batch_size + 1
        data = []
        for i in range(steps):
            mean = torch.zeros(self._batch_size, self._embedding_dim)
            std = mean + 1
            fakez = torch.normal(mean=mean, std=std).to(self._device)

            if global_condition_vec is not None:
                condvec = global_condition_vec.copy()
            else:
                condvec = self._data_sampler.sample_original_condvec(
                    self._batch_size)

            if condvec is None:
                pass
            else:
                c1 = condvec
                c1 = torch.from_numpy(c1).to(self._device)
                fakez = torch.cat([fakez, c1], dim=1)

            fake = self._generator(fakez)
            fakeact = self._apply_activate(fake)
            data.append(fakeact.detach().cpu().numpy())

        data = np.concatenate(data, axis=0)
        data = data[:n]

        return self._transformer.inverse_transform(data)

    def set_device(self, device):
        self._device = device
        if hasattr(self, '_generator'):
            self._generator.to(self._device)
        if hasattr(self, '_discriminator'):
            self._discriminator.to(self._device)
class GradientAccumulation_test(unittest.TestCase):
    def setUp(self):
        self.DATA_SIZE = 64
        self.BATCH_SIZE = 16
        self.SAMPLE_RATE = self.BATCH_SIZE / self.DATA_SIZE
        self.LR = 0  # we want to call optimizer.step() without modifying the model
        self.ALPHAS = [1 + x / 10.0 for x in range(1, 100, 10)]
        self.criterion = nn.CrossEntropyLoss()

        self.setUp_data()
        self.setUp_model_and_optimizer()

    def setUp_data(self):
        self.ds = FakeData(
            size=self.DATA_SIZE,
            image_size=(1, 35, 35),
            num_classes=10,
            transform=transforms.Compose(
                [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
            ),
        )
        self.dl = DataLoader(self.ds, batch_size=self.BATCH_SIZE)

    def setUp_model_and_optimizer(self):
        self.model = SampleConvNet()
        self.optimizer = torch.optim.SGD(
            self.model.parameters(), lr=self.LR, momentum=0
        )

        self.optimizer.zero_grad()

        # accumulate .grad over the entire dataset
        for x, y in self.dl:
            logits = self.model(x)
            loss = self.criterion(logits, y)
            loss.backward()

        self.effective_batch_grad = torch.cat(
            [p.grad.reshape(-1) for p in self.model.parameters() if p.requires_grad]
        ) * (self.BATCH_SIZE / self.DATA_SIZE)

        self.optimizer.zero_grad()

    def setUp_privacy_engine(self, batch_size):
        self.privacy_engine = PrivacyEngine(
            self.model,
            sample_rate=batch_size / self.DATA_SIZE,
            alphas=self.ALPHAS,
            noise_multiplier=0,
            max_grad_norm=999,
        )
        self.privacy_engine.attach(self.optimizer)

    def calc_per_sample_grads(self, data_iter, num_steps=1):
        for x, y in data_iter:
            num_steps -= 1
            logits = self.model(x)
            loss = self.criterion(logits, y)
            loss.backward()
            if num_steps == 0:
                break

    def test_grad_sample_accumulation(self):
        """
        Calling loss.backward() multiple times should sum up the gradients in .grad
        and accumulate all the individual gradients in .grad-sample
        """
        self.setUp_privacy_engine(self.DATA_SIZE)
        data_iter = iter(self.dl)  # 4 batches of size 4 each
        self.calc_per_sample_grads(data_iter, num_steps=4)
        # should accumulate grads in .grad and .grad_sample

        # the accumulated per-sample gradients
        per_sample_grads = torch.cat(
            [
                p.grad_sample.reshape(self.DATA_SIZE, -1)
                for p in self.model.parameters()
                if p.requires_grad
            ],
            dim=-1,
        )
        # average up all the per-sample gradients
        accumulated_grad = torch.mean(per_sample_grads, dim=0)

        # the full data gradient accumulated in .grad
        grad = torch.cat(
            [p.grad.reshape(-1) for p in self.model.parameters() if p.requires_grad]
        ) * (self.BATCH_SIZE / self.DATA_SIZE)

        self.optimizer.step()

        # the accumulated gradients in .grad without any hooks
        orig_grad = self.effective_batch_grad

        self.assertTrue(
            torch.allclose(accumulated_grad, orig_grad, atol=10e-5, rtol=10e-3)
        )
        self.assertTrue(torch.allclose(grad, orig_grad, atol=10e-5, rtol=10e-3))

    def test_clipper_accumulation(self):
        """
        Calling optimizer.virtual_step() should accumulate clipped gradients to form
        one large batch.
        """
        self.setUp_privacy_engine(self.DATA_SIZE)
        data = iter(self.dl)  # 4 batches of size 4 each

        for _ in range(3):  # take 3 virtual steps
            self.calc_per_sample_grads(data, num_steps=1)
            self.optimizer.virtual_step()

        # accumulate on the last step
        self.calc_per_sample_grads(data, num_steps=1)
        self.optimizer.step()

        # .grad should contain the average gradient over the entire dataset
        accumulated_grad = torch.cat(
            [p.grad.reshape(-1) for p in self.model.parameters() if p.requires_grad]
        )

        # the accumulated gradients in .grad without any hooks
        orig_grad = self.effective_batch_grad

        self.assertTrue(
            torch.allclose(accumulated_grad, orig_grad, atol=10e-5, rtol=10e-3),
            f"Values are {accumulated_grad} vs {orig_grad}."
            f"MAD is {(orig_grad - accumulated_grad).abs().mean()}",
        )

    def test_mixed_accumulation(self):
        """
        Calling loss.backward() multiple times aggregates all per-sample gradients in
        .grad-sample. Then, calling optimizer.virtual_step() should clip all gradients
        and aggregate them into one large batch.
        """
        self.setUp_privacy_engine(self.DATA_SIZE)
        data = iter(self.dl)  # 4 batches of size 4 each

        # accumulate per-sample grads for two mini batches
        self.calc_per_sample_grads(data, num_steps=2)
        # take a virtual step
        self.optimizer.virtual_step()
        # accumulate another two mini batches
        self.calc_per_sample_grads(data, num_steps=2)
        # take a step
        self.optimizer.step()

        # .grad should contain the average gradient over the entire dataset
        accumulated_grad = torch.cat(
            [p.grad.reshape(-1) for p in self.model.parameters() if p.requires_grad]
        )

        # the accumulated gradients in .grad without any hooks
        orig_grad = self.effective_batch_grad

        self.assertTrue(
            torch.allclose(accumulated_grad, orig_grad, atol=10e-5, rtol=10e-3)
        )

    def test_grad_sample_erased(self):
        """
        Calling optimizer.step() should erase any accumulated per-sample gradients.
        """
        self.setUp_privacy_engine(2 * self.BATCH_SIZE)
        data = iter(self.dl)  # 4 batches of size 4 each

        for _ in range(2):
            # accumulate per-sample gradients for two mini-batches to form an
            # effective batch of size `2*BATCH_SIZE`. Once an effective batch
            # has been accumulated, we call `optimizer.step()` to clip and
            # average the per-sample gradients. This should erase the
            # `grad_sample` fields for each parameter
            self.calc_per_sample_grads(data, num_steps=2)
            self.optimizer.step()

            for param_name, param in self.model.named_parameters():
                if param.requires_grad:
                    self.assertFalse(
                        hasattr(param, "grad_sample"),
                        f"Per-sample gradients haven't been erased "
                        f"for {param_name}",
                    )

    def test_summed_grad_erased(self):
        """
        Calling optimizer.step() should erase any accumulated clipped gradients.
        """

        self.setUp_privacy_engine(2 * self.BATCH_SIZE)
        data = iter(self.dl)  # 4 batches of size 4 each

        for idx in range(4):
            self.calc_per_sample_grads(data, num_steps=1)

            if idx % 2 == 0:
                # perform a virtual step for each mini-batch
                # this will accumulate clipped gradients in each parameter's
                # `summed_grads` field.
                self.optimizer.virtual_step()
                for param_name, param in self.model.named_parameters():
                    if param.requires_grad:
                        self.assertTrue(
                            hasattr(param, "summed_grad"),
                            f"Clipped gradients aren't accumulated "
                            f"for {param_name}",
                        )
            else:
                # accumulate gradients for two mini-batches to form an
                # effective batch of size `2*BATCH_SIZE`. Once an effective batch
                # has been accumulated, we call `optimizer.step()` to compute the
                # average gradient for the entire batch. This should erase the
                # `summed_grads` fields for each parameter.
                # take a step. The clipper will compute the mean gradient
                # for the entire effective batch and populate each parameter's
                # `.grad` field.
                self.optimizer.step()

                for param_name, param in self.model.named_parameters():
                    if param.requires_grad:
                        self.assertFalse(
                            hasattr(param, "summed_grad"),
                            f"Accumulated clipped gradients haven't been erased "
                            f"¨for {param_name}",
                        )
Ejemplo n.º 26
0
def main():
    parser = argparse.ArgumentParser(description="PyTorch IMDB Example")
    parser.add_argument(
        "-b",
        "--batch-size-test",
        type=int,
        default=64,
        metavar="B",
        help="input batch size for test (default: 64)",
    )
    parser.add_argument(
        "-sr",
        "--sample-rate",
        type=float,
        default=0.00256,
        metavar="SR",
        help="sample rate used for batch construction (default: 0.00256)",
    )
    parser.add_argument(
        "-n",
        "--epochs",
        type=int,
        default=10,
        metavar="N",
        help="number of epochs to train (default: 10)",
    )
    parser.add_argument(
        "--lr",
        type=float,
        default=0.02,
        metavar="LR",
        help="learning rate (default: .02)",
    )
    parser.add_argument(
        "--sigma",
        type=float,
        default=0.56,
        metavar="S",
        help="Noise multiplier (default 0.56)",
    )
    parser.add_argument(
        "-c",
        "--max-per-sample-grad_norm",
        type=float,
        default=1.0,
        metavar="C",
        help="Clip per-sample gradients to this norm (default 1.0)",
    )
    parser.add_argument(
        "--delta",
        type=float,
        default=1e-5,
        metavar="D",
        help="Target delta (default: 1e-5)",
    )
    parser.add_argument(
        "--max-sequence-length",
        type=int,
        default=256,
        metavar="SL",
        help="Longer sequences will be cut to this length (default: 256)",
    )
    parser.add_argument(
        "--device",
        type=str,
        default="cuda",
        help="GPU ID for this process (default: 'cuda')",
    )
    parser.add_argument(
        "--save-model",
        action="store_true",
        default=False,
        help="Save the trained model (default: false)",
    )
    parser.add_argument(
        "--disable-dp",
        action="store_true",
        default=False,
        help="Disable privacy training and just train with vanilla optimizer",
    )
    parser.add_argument(
        "--secure-rng",
        action="store_true",
        default=False,
        help="Enable Secure RNG to have trustworthy privacy guarantees. Comes at a performance cost",
    )
    parser.add_argument(
        "--data-root", type=str, default="../imdb", help="Where IMDB is/will be stored"
    )
    parser.add_argument(
        "-j",
        "--workers",
        default=2,
        type=int,
        metavar="N",
        help="number of data loading workers (default: 2)",
    )

    args = parser.parse_args()
    device = torch.device(args.device)

    raw_dataset = load_dataset("imdb", cache_dir=args.data_root)
    tokenizer = BertTokenizerFast.from_pretrained("bert-base-cased")
    dataset = raw_dataset.map(
        lambda x: tokenizer(
            x["text"], truncation=True, max_length=args.max_sequence_length
        ),
        batched=True,
    )
    dataset.set_format(type="torch", columns=["input_ids", "label"])

    train_dataset = dataset["train"]
    test_dataset = dataset["test"]

    if args.secure_rng:
        try:
            import torchcsprng as prng
        except ImportError as e:
            msg = (
                "To use secure RNG, you must install the torchcsprng package! "
                "Check out the instructions here: https://github.com/pytorch/csprng#installation"
            )
            raise ImportError(msg) from e

        generator = prng.create_random_device_generator("/dev/urandom")

    else:
        generator = None

    train_loader = DataLoader(
        train_dataset,
        num_workers=args.workers,
        generator=generator,
        batch_sampler=UniformWithReplacementSampler(
            num_samples=len(train_dataset),
            sample_rate=args.sample_rate,
            generator=generator,
        ),
        collate_fn=padded_collate,
        pin_memory=True,
    )

    test_loader = torch.utils.data.DataLoader(
        test_dataset,
        batch_size=args.batch_size_test,
        shuffle=False,
        num_workers=args.workers,
        collate_fn=padded_collate,
        pin_memory=True,
    )

    model = SampleNet(vocab_size=len(tokenizer)).to(device)
    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    if not args.disable_dp:
        privacy_engine = PrivacyEngine(
            model,
            sample_rate=args.sample_rate,
            alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
            noise_multiplier=args.sigma,
            max_grad_norm=args.max_per_sample_grad_norm,
            secure_rng=args.secure_rng,
        )
        privacy_engine.attach(optimizer)

    mean_accuracy = 0
    for epoch in range(1, args.epochs + 1):
        train(args, model, train_loader, optimizer, epoch)
        mean_accuracy = evaluate(args, model, test_loader)

    torch.save(mean_accuracy, "run_results_imdb_classification.pt")
Ejemplo n.º 27
0
def main(tiny_images=None,
         model="cnn",
         augment=False,
         use_scattering=False,
         batch_size=2048,
         mini_batch_size=256,
         lr=1,
         lr_start=None,
         optim="SGD",
         momentum=0.9,
         noise_multiplier=1,
         max_grad_norm=0.1,
         epochs=100,
         bn_noise_multiplier=None,
         max_epsilon=None,
         data_size=550000,
         delta=1e-6,
         logdir=None):
    logger = Logger(logdir)

    device = get_device()

    bs = batch_size
    assert bs % mini_batch_size == 0
    n_acc_steps = bs // mini_batch_size

    train_data, test_data = get_data("cifar10", augment=augment)
    train_loader = torch.utils.data.DataLoader(train_data,
                                               batch_size=100,
                                               shuffle=False,
                                               num_workers=4,
                                               pin_memory=True)

    test_loader = torch.utils.data.DataLoader(test_data,
                                              batch_size=100,
                                              shuffle=False,
                                              num_workers=4,
                                              pin_memory=True)

    if isinstance(tiny_images, torch.utils.data.Dataset):
        train_data_aug = tiny_images
    else:
        print("loading tiny images...")
        train_data_aug, _ = get_data("cifar10_500K",
                                     augment=augment,
                                     aux_data_filename=tiny_images)

    scattering, K, (h, w) = None, None, (None, None)
    pre_scattered = False
    if use_scattering:
        scattering, K, (h, w) = get_scatter_transform("cifar10_500K")
        scattering.to(device)

    # if the whole data fits in memory, pre-compute the scattering
    if use_scattering and data_size <= 50000:
        loader = torch.utils.data.DataLoader(train_data_aug,
                                             batch_size=100,
                                             shuffle=False,
                                             num_workers=4)
        train_data_aug = get_scattered_dataset(loader, scattering, device,
                                               data_size)
        pre_scattered = True

    assert data_size <= len(train_data_aug)
    num_sup = min(data_size, 50000)
    num_batches = int(np.ceil(50000 / mini_batch_size))  # cifar-10 equivalent

    train_batch_sampler = SemiSupervisedSampler(data_size, num_batches,
                                                mini_batch_size)
    train_loader_aug = torch.utils.data.DataLoader(
        train_data_aug,
        batch_sampler=train_batch_sampler,
        num_workers=0 if pre_scattered else 4,
        pin_memory=not pre_scattered)

    rdp_norm = 0
    if model == "cnn":
        if use_scattering:
            save_dir = f"bn_stats/cifar10_500K"
            os.makedirs(save_dir, exist_ok=True)
            bn_stats, rdp_norm = scatter_normalization(
                train_loader,
                scattering,
                K,
                device,
                data_size,
                num_sup,
                noise_multiplier=bn_noise_multiplier,
                orders=ORDERS,
                save_dir=save_dir)
            model = CNNS["cifar10"](K, input_norm="BN", bn_stats=bn_stats)
            model = model.to(device)

            if not pre_scattered:
                model = nn.Sequential(scattering, model)
        else:
            model = CNNS["cifar10"](in_channels=3, internal_norm=False)

    elif model == "linear":
        save_dir = f"bn_stats/cifar10_500K"
        os.makedirs(save_dir, exist_ok=True)
        bn_stats, rdp_norm = scatter_normalization(
            train_loader,
            scattering,
            K,
            device,
            data_size,
            num_sup,
            noise_multiplier=bn_noise_multiplier,
            orders=ORDERS,
            save_dir=save_dir)
        model = ScatterLinear(K, (h, w), input_norm="BN", bn_stats=bn_stats)
        model = model.to(device)

        if not pre_scattered:
            model = nn.Sequential(scattering, model)
    else:
        raise ValueError(f"Unknown model {model}")
    model.to(device)

    if pre_scattered:
        test_loader = get_scattered_loader(test_loader, scattering, device)

    print(f"model has {get_num_params(model)} parameters")

    if optim == "SGD":
        optimizer = torch.optim.SGD(model.parameters(),
                                    lr=lr,
                                    momentum=momentum)
    else:
        optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    privacy_engine = PrivacyEngine(
        model,
        bs,
        data_size,
        alphas=ORDERS,
        noise_multiplier=noise_multiplier,
        max_grad_norm=max_grad_norm,
    )
    privacy_engine.attach(optimizer)

    best_acc = 0
    flat_count = 0

    for epoch in range(0, epochs):

        print(f"\nEpoch: {epoch} ({privacy_engine.steps} steps)")
        train_loss, train_acc = train(model,
                                      train_loader_aug,
                                      optimizer,
                                      n_acc_steps=n_acc_steps)
        test_loss, test_acc = test(model, test_loader)

        if noise_multiplier > 0:
            print(f"sample_rate={privacy_engine.sample_rate}, "
                  f"mul={privacy_engine.noise_multiplier}, "
                  f"steps={privacy_engine.steps}")
            rdp_sgd = get_renyi_divergence(
                privacy_engine.sample_rate,
                privacy_engine.noise_multiplier) * privacy_engine.steps
            epsilon, _ = get_privacy_spent(rdp_norm + rdp_sgd,
                                           target_delta=delta)
            epsilon2, _ = get_privacy_spent(rdp_sgd, target_delta=delta)
            print(f"ε = {epsilon:.3f} (sgd only: ε = {epsilon2:.3f})")

            if max_epsilon is not None and epsilon >= max_epsilon:
                return
        else:
            epsilon = None

        logger.log_epoch(epoch, train_loss, train_acc, test_loss, test_acc,
                         epsilon)
        logger.log_scalar("epsilon/train", epsilon, epoch)
        logger.log_scalar("cifar10k_loss/train", train_loss, epoch)
        logger.log_scalar("cifar10k_acc/train", train_acc, epoch)

        if test_acc > best_acc:
            best_acc = test_acc
            flat_count = 0
        else:
            flat_count += 1
            if flat_count >= 20:
                print("plateau...")
                return
Ejemplo n.º 28
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
    parser.add_argument(
        "-sr",
        "--sample-rate",
        type=float,
        default=0.001,
        metavar="SR",
        help="sample rate used for batch construction (default: 0.001)",
    )
    parser.add_argument(
        "--test-batch-size",
        type=int,
        default=1024,
        metavar="TB",
        help="input batch size for testing (default: 1024)",
    )
    parser.add_argument(
        "-n",
        "--epochs",
        type=int,
        default=10,
        metavar="N",
        help="number of epochs to train (default: 14)",
    )
    parser.add_argument(
        "-r",
        "--n-runs",
        type=int,
        default=1,
        metavar="R",
        help="number of runs to average on (default: 1)",
    )
    parser.add_argument(
        "--lr",
        type=float,
        default=0.1,
        metavar="LR",
        help="learning rate (default: .1)",
    )
    parser.add_argument(
        "--sigma",
        type=float,
        default=1.0,
        metavar="S",
        help="Noise multiplier (default 1.0)",
    )
    parser.add_argument(
        "-c",
        "--max-per-sample-grad_norm",
        type=float,
        default=1.0,
        metavar="C",
        help="Clip per-sample gradients to this norm (default 1.0)",
    )
    parser.add_argument(
        "--delta",
        type=float,
        default=1e-5,
        metavar="D",
        help="Target delta (default: 1e-5)",
    )
    parser.add_argument(
        "--device",
        type=str,
        default="cuda",
        help="GPU ID for this process (default: 'cuda')",
    )
    parser.add_argument(
        "--save-model",
        action="store_true",
        default=False,
        help="Save the trained model (default: false)",
    )
    parser.add_argument(
        "--disable-dp",
        action="store_true",
        default=False,
        help="Disable privacy training and just train with vanilla SGD",
    )
    parser.add_argument(
        "--secure-rng",
        action="store_true",
        default=False,
        help=
        "Enable Secure RNG to have trustworthy privacy guarantees. Comes at a performance cost",
    )
    parser.add_argument(
        "--data-root",
        type=str,
        default="../mnist",
        help="Where MNIST is/will be stored",
    )
    args = parser.parse_args()
    device = torch.device(args.device)

    kwargs = {"num_workers": 1, "pin_memory": True}

    if args.secure_rng:
        try:
            import torchcsprng as prng
        except ImportError as e:
            msg = (
                "To use secure RNG, you must install the torchcsprng package! "
                "Check out the instructions here: https://github.com/pytorch/csprng#installation"
            )
            raise ImportError(msg) from e

        generator = prng.create_random_device_generator("/dev/urandom")

    else:
        generator = None

    train_dataset = datasets.MNIST(
        args.data_root,
        train=True,
        download=True,
        transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((MNIST_MEAN, ), (MNIST_STD, )),
        ]),
    )

    train_loader = torch.utils.data.DataLoader(
        train_dataset,
        generator=generator,
        batch_sampler=UniformWithReplacementSampler(
            num_samples=len(train_dataset),
            sample_rate=args.sample_rate,
            generator=generator,
        ),
        **kwargs,
    )
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST(
            args.data_root,
            train=False,
            transform=transforms.Compose([
                transforms.ToTensor(),
                transforms.Normalize((MNIST_MEAN, ), (MNIST_STD, )),
            ]),
        ),
        batch_size=args.test_batch_size,
        shuffle=True,
        **kwargs,
    )
    run_results = []
    for _ in range(args.n_runs):
        model = SampleConvNet().to(device)

        optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0)
        if not args.disable_dp:
            privacy_engine = PrivacyEngine(
                model,
                sample_rate=args.sample_rate,
                alphas=[1 + x / 10.0
                        for x in range(1, 100)] + list(range(12, 64)),
                noise_multiplier=args.sigma,
                max_grad_norm=args.max_per_sample_grad_norm,
                secure_rng=args.secure_rng,
            )
            privacy_engine.attach(optimizer)
        for epoch in range(1, args.epochs + 1):
            train(args, model, device, train_loader, optimizer, epoch)
        run_results.append(test(args, model, device, test_loader))

    if len(run_results) > 1:
        print("Accuracy averaged over {} runs: {:.2f}% ± {:.2f}%".format(
            len(run_results),
            np.mean(run_results) * 100,
            np.std(run_results) * 100))

    repro_str = (
        f"{model.name()}_{args.lr}_{args.sigma}_"
        f"{args.max_per_sample_grad_norm}_{args.sample_rate}_{args.epochs}")
    torch.save(run_results, f"run_results_{repro_str}.pt")

    if args.save_model:
        torch.save(model.state_dict(), f"mnist_cnn_{repro_str}.pt")
Ejemplo n.º 29
0
    model = Net_embedder(embedders, hidden_dims, num_classes)
    print(model)
    model.to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.RMSprop(model.parameters(), args.lr)
    if args.epsilon is not None:
        max_epsilon, delta, sensitivity = get_priv_params(args.epsilon)
        privacy_engine = PrivacyEngine(model,
                                       batch_size=args.batch_size,
                                       sample_size=len(sampler),
                                       alphas=list(range(2, 32)),
                                       noise_multiplier=args.noise_multiplier,
                                       max_grad_norm=args.max_grad_norm,
                                       target_delta=delta)
        privacy_engine.attach(optimizer)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, args.num_epochs * len(dataloader_train))
    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=10, factor=0.5)

    # Training loop
    best_loss = np.infty
    best_model = None
    for i in range(args.num_epochs):
        loss = train(model, dataloader_train, optimizer, criterion, device, scheduler=scheduler)
        # loss = train(model, dataloader_train, optimizer, criterion, device, scheduler=None)
        # scheduler.step(loss)

        log = f"Train Epoch: {i}\tLoss: {loss:.6f}"
        if args.epsilon is not None:
            epsilon, best_alpha = optimizer.privacy_engine.get_privacy_spent(delta)
            log += f" (ε = {epsilon:.2f}, δ = {delta}) for α = {best_alpha}"
Ejemplo n.º 30
0
def main():
    parser = ArgParser()
    args = parser.parse_args()

    gen = Generator(args.latent_dim).to(args.device)
    disc = Discriminator().to(args.device)
    if args.device != 'cpu':
        gen = nn.DataParallel(gen, args.gpu_ids)
        disc = nn.DataParallel(disc, args.gpu_ids)
    # gen = gen.apply(weights_init)
    # disc = disc.apply(weights_init)

    gen_opt = torch.optim.RMSprop(gen.parameters(), lr=args.lr)
    disc_opt = torch.optim.RMSprop(disc.parameters(), lr=args.lr)
    gen_scheduler = torch.optim.lr_scheduler.LambdaLR(gen_opt, lr_lambda=lr_lambda(args.num_epochs))
    disc_scheduler = torch.optim.lr_scheduler.LambdaLR(disc_opt, lr_lambda=lr_lambda(args.num_epochs))
    disc_loss_fn = DiscriminatorLoss().to(args.device)
    gen_loss_fn = GeneratorLoss().to(args.device)

    # dataset = Dataset()
    dataset = MNISTDataset()
    loader = DataLoader(dataset, batch_size=args.batch_size, num_workers=args.num_workers)

    logger = TrainLogger(args, len(loader), phase=None)
    logger.log_hparams(args)

    if args.privacy_noise_multiplier != 0:
        privacy_engine = PrivacyEngine(
            disc,
            batch_size=args.batch_size,
            sample_size=len(dataset),
            alphas=[1 + x / 10.0 for x in range(1, 100)] + list(range(12, 64)),
            noise_multiplier=.8,
            max_grad_norm=0.02,
            batch_first=True,
        )
        privacy_engine.attach(disc_opt)
        privacy_engine.to(args.device)

    for epoch in range(args.num_epochs):
        logger.start_epoch()
        for cur_step, img in enumerate(tqdm(loader, dynamic_ncols=True)):
            logger.start_iter()
            img = img.to(args.device)
            fake, disc_loss = None, None
            for _ in range(args.step_train_discriminator):
                disc_opt.zero_grad()
                fake_noise = get_noise(args.batch_size, args.latent_dim, device=args.device)
                fake = gen(fake_noise)
                disc_loss = disc_loss_fn(img, fake, disc)
                disc_loss.backward()
                disc_opt.step()

            gen_opt.zero_grad()
            fake_noise_2 = get_noise(args.batch_size, args.latent_dim, device=args.device)
            fake_2 = gen(fake_noise_2)
            gen_loss = gen_loss_fn(img, fake_2, disc)
            gen_loss.backward()
            gen_opt.step()
            if args.privacy_noise_multiplier != 0:
                epsilon, best_alpha = privacy_engine.get_privacy_spent(args.privacy_delta)

            logger.log_iter_gan_from_latent_vector(img, fake, gen_loss, disc_loss, epsilon if args.privacy_noise_multiplier != 0 else 0)
            logger.end_iter()

        logger.end_epoch()
        gen_scheduler.step()
        disc_scheduler.step()