num_workers=0, drop_last=True) # Generate random samples for test random_samples = next(iter(dataloader_test)) feature_size = random_samples.size()[1] ########################### ## Privacy Calculation #### ########################### if opt.dp_privacy: totalsamples = len(dataset_train_object) num_batches = len(dataloader_train) iterations = opt.n_epochs_pretrain * num_batches print('Achieves ({}, {})-DP'.format( analysis.epsilon(totalsamples, opt.batch_size, opt.noise_multiplier, iterations, opt.delta), opt.delta, )) #################### ### Architecture ### #################### class Autoencoder(nn.Module): def __init__(self): super(Autoencoder, self).__init__() n_channels_base = 4 self.encoder = nn.Sequential( nn.Conv1d(in_channels=1, out_channels=n_channels_base,
def train(params): dataset = { 'mimic': mimic_dataset, 'credit': credit_dataset, 'census': census_dataset, }[params['dataset']] _, train_dataset, validation_dataset, _ = dataset.get_datasets() x_validation = next(iter(DataLoader(validation_dataset, batch_size=len(validation_dataset)))).to(params['device']) autoencoder = Autoencoder( example_dim=np.prod(train_dataset[0].shape), compression_dim=params['compress_dim'], binary=params['binary'], device=params['device'], ) decoder_optimizer = dp_optimizer.DPAdam( l2_norm_clip=params['l2_norm_clip'], noise_multiplier=params['noise_multiplier'], minibatch_size=params['minibatch_size'], microbatch_size=params['microbatch_size'], params=autoencoder.get_decoder().parameters(), lr=params['lr'], betas=(params['b1'], params['b2']), weight_decay=params['l2_penalty'], ) encoder_optimizer = torch.optim.Adam( params=autoencoder.get_encoder().parameters(), lr=params['lr'] * params['microbatch_size'] / params['minibatch_size'], betas=(params['b1'], params['b2']), weight_decay=params['l2_penalty'], ) autoencoder_loss = lambda inp, target: nn.BCELoss(reduction='none')(inp, target).sum(dim=1).mean(dim=0) if params['binary'] else nn.MSELoss() print('Achieves ({}, {})-DP'.format( analysis.epsilon( len(train_dataset), params['minibatch_size'], params['noise_multiplier'], params['iterations'], params['delta'] ), params['delta'], )) minibatch_loader, microbatch_loader = sampling.get_data_loaders( minibatch_size=params['minibatch_size'], microbatch_size=params['microbatch_size'], iterations=params['iterations'], ) iteration = 0 train_losses, validation_losses = [], [] for X_minibatch in minibatch_loader(train_dataset): encoder_optimizer.zero_grad() decoder_optimizer.zero_grad() for X_microbatch in microbatch_loader(X_minibatch): X_microbatch = X_microbatch.to(params['device']) decoder_optimizer.zero_microbatch_grad() output = autoencoder(X_microbatch) loss = autoencoder_loss(output, X_microbatch) loss.backward() decoder_optimizer.microbatch_step() encoder_optimizer.step() decoder_optimizer.step() validation_loss = autoencoder_loss(autoencoder(x_validation).detach(), x_validation) train_losses.append(loss.item()) validation_losses.append(validation_loss.item()) if iteration % 100 == 0: print ('[Iteration %d/%d] [Loss: %f] [Validation Loss: %f]' % ( iteration, params['iterations'], loss.item(), validation_loss.item()) ) iteration += 1 return autoencoder, pd.DataFrame(data={'train': train_losses, 'validation': validation_losses})
pass iterations = int(model_file[:-4]) if private: minibatch_size = 128 noise_multiplier = 1.1 delta = 1.2871523321606923e-5 from dp_autoencoder import Autoencoder from dp_wgan import Generator latent_dim = 64 generator = torch.load(path + model_file) decoder = torch.load('dp_autoencoder.dat').get_decoder() epsilon = analysis.epsilon(len(train_dataset), minibatch_size, noise_multiplier, iterations, delta) body = 'N: {}\nb: {}\nSigma: {}\nT: {}\nEps: {}\nDelta: {}'.format( len(train_dataset), minibatch_size, noise_multiplier, iterations, epsilon, delta) with open(model_dir + 'eps.txt', 'w') as f: f.write(body) else: from autoencoder import Autoencoder from wgan import Generator latent_dim = 128 generator = torch.load(path + model_file) decoder = torch.load('autoencoder.dat').get_decoder()
def train(params): dataset = { 'mimic': mimic_dataset, }[params['dataset']] _, train_dataset, _, _ = dataset.get_datasets() with open('dp_autoencoder.dat', 'rb') as f: autoencoder = torch.load(f) decoder = autoencoder.get_decoder() generator = Generator( input_dim=params['latent_dim'], output_dim=autoencoder.get_compression_dim(), binary=params['binary'], device=params['device'], ) g_optimizer = torch.optim.RMSprop( params=generator.parameters(), lr=params['lr'], alpha=params['alpha'], weight_decay=params['l2_penalty'], ) discriminator = Discriminator( input_dim=np.prod(train_dataset[0].shape), device=params['device'], ) d_optimizer = dp_optimizer.DPRMSprop( l2_norm_clip=params['l2_norm_clip'], noise_multiplier=params['noise_multiplier'], minibatch_size=params['minibatch_size'], microbatch_size=params['microbatch_size'], params=discriminator.parameters(), lr=params['lr'], alpha=params['alpha'], weight_decay=params['l2_penalty'], ) print('Achieves ({}, {})-DP'.format( analysis.epsilon(len(train_dataset), params['minibatch_size'], params['noise_multiplier'], params['iterations'], params['delta']), params['delta'], )) minibatch_loader, microbatch_loader = sampling.get_data_loaders( params['minibatch_size'], params['microbatch_size'], params['iterations'], ) iteration = 0 for X_minibatch in minibatch_loader(train_dataset): d_optimizer.zero_grad() for real in microbatch_loader(X_minibatch): real = real.to(params['device']) z = torch.randn(real.size(0), params['latent_dim'], device=params['device'], requires_grad=False) fake = decoder(generator(z)).detach() d_optimizer.zero_microbatch_grad() d_loss = -torch.mean(discriminator(real)) + torch.mean( discriminator(fake)) d_loss.backward() d_optimizer.microbatch_step() d_optimizer.step() for parameter in discriminator.parameters(): parameter.data.clamp_(-params['clip_value'], params['clip_value']) if iteration % params['d_updates'] == 0: z = torch.randn(X_minibatch.size(0), params['latent_dim'], device=params['device'], requires_grad=False) fake = decoder(generator(z)) g_optimizer.zero_grad() g_loss = -torch.mean(discriminator(fake)) g_loss.backward() g_optimizer.step() if iteration % 100 == 0: print('[Iteration %d/%d] [D loss: %f] [G loss: %f]' % (iteration, params['iterations'], d_loss.item(), g_loss.item())) iteration += 1 if iteration % 1000 == 0: with open('dpwgans1/{}.dat'.format(iteration), 'wb') as f: torch.save(generator, f) return generator
else: weights.append(1.) ds.append((datatype, 1)) weights = torch.tensor(weights).to(ae_params['device']) #autoencoder_loss = (lambda input, target: torch.mul(weights, torch.pow(input-target, 2)).sum(dim=1).mean(dim=0)) #autoencoder_loss = lambda input, target: torch.mul(weights, F.binary_cross_entropy(input, target, reduction='none')).sum(dim=1).mean(dim=0) autoencoder_loss = nn.BCELoss() #autoencoder_loss = nn.MSELoss() print(autoencoder) print('Achieves ({}, {})-DP'.format( analysis.epsilon(len(X_train_encoded), ae_params['minibatch_size'], ae_params['noise_multiplier'], ae_params['iterations'], ae_params['delta']), ae_params['delta'], )) minibatch_loader, microbatch_loader = sampling.get_data_loaders( minibatch_size=ae_params['minibatch_size'], microbatch_size=ae_params['microbatch_size'], iterations=ae_params['iterations'], nonprivate=ae_params['nonprivate'], ) train_losses, validation_losses = [], [] X_train_encoded = X_train_encoded.to(ae_params['device']) X_test_encoded = X_test_encoded.to(ae_params['device'])