def infer(T, C, sigma, batch_size, Optimizer, lr, X_apps, k): from linear import ReparamXpand N = len(X_apps) ## Initialize and expand model ### Define model for reparametrization param_dims = { 'theta_unconstrained': [k, X_apps.shape[-1]], 'pi_unconstrained': [k - 1] } param_dims = OrderedDict(param_dims) ### Compute the total number of parameters in model input_dim = int(np.sum([np.prod(value) for value in param_dims.values()])) flat_param_dims = np.array( [np.prod(value) for value in param_dims.values()]) if sigma > 0: model = ReparamXpand(batch_size, input_dim, param_dims, flat_param_dims) optimizer = Optimizer(model.parameters(), lr=lr) else: model = ReparamXpand(1, input_dim, param_dims, flat_param_dims) optimizer = Optimizer(model.parameters(), lr=lr) if use_cuda: X_apps = X_apps.cuda() model.cuda() model.reparam.weight.data[:, -(k - 1):].mul_(0) model.reparam.bias.data[:, -(k - 1):].mul_(0) ## Training model model = DPVI(model, T, N, batch_size, X_apps, sigma, C, optimizer) ## Create a generative model based on model parameters and return it generative_model = ReparamXpand(1, input_dim, param_dims, flat_param_dims) generative_model.reparam.bias.detach_() generative_model.reparam.weight.detach_() generative_model.reparam.bias.data = torch.tensor( model.reparam.bias.data.cpu()[0].data.numpy(), device='cpu') generative_model.reparam.weight.data = torch.tensor( model.reparam.weight.data.cpu()[0].data.numpy(), device='cpu') return generative_model
def infer(T, C, sigma, batch_size, n_mc, Optimizer, learning_rate, train_data): ## Initialize and expand model input_dim = train_data.shape[1] - 1 model = ReparamXpand(batch_size, input_dim) if use_cuda: model.cuda() optimizer = Optimizer(model.parameters(), lr=learning_rate) model = DPVI(model, T, n_mc, train_data.shape[0], \ batch_size, train_data, sigma, C, optimizer, use_cuda) ## Create a generative model based on model parameters and return it generative_model = ReparamXpand(1, input_dim) generative_model.reparam.bias.data = torch.tensor( model.reparam.bias.data.cpu()[0].data.numpy(), device='cpu') generative_model.reparam.weight.data = torch.tensor( model.reparam.weight.data.cpu()[0].data.numpy(), device='cpu') generative_model.reparam.bias.detach_() generative_model.reparam.weight.detach_() return generative_model
def infer(T, batch_size, Optimizer, learning_rate, train_data, variable_types, k): ## Initialize and expand model param_dims = OrderedDict() for key, value in variable_types.items(): if key == 'pi_unconstrained': param_dims[key] = [k - 1] else: if value == 'Bernoulli': param_dims[key] = [k] elif (key == 'lex.dur' and variable_types[key] == None): param_dims[key] = [2, k] elif (key == 'ep' and variable_types[key] == None): param_dims[key] = [k] elif (key == 'dead' and variable_types[key] == None): param_dims[key] = [k] elif value == 'Beta': param_dims[key] = [2, k] elif value == 'Categorical': param_dims[key] = [k, len(np.unique(train_data[key]))] input_dim = int(np.sum([np.prod(value) for value in param_dims.values()])) flat_param_dims = np.array( [np.prod(value) for value in param_dims.values()]) model = ReparamXpand(1, input_dim, param_dims, flat_param_dims) model.reparam.bias.data = model.reparam.bias.data.flatten() model.reparam.weight.data = model.reparam.weight.data.flatten() ### Init model close to feature means def logit(y): return torch.log(y) - torch.log(1. - y) def inverse_softmax(y): last = 1e-23 * torch.ones(1) # just something small sum_term = -50. - torch.log(last) x = torch.log(y) - sum_term return x ### Init model close to feature means ## Laplace mech with small epsilon to guarantee DP of the initialization for key in train_data.columns: if variable_types[key] == 'Bernoulli' or key in ['dead']: param_mean = torch.as_tensor(train_data[key].mean(0)) param_location = list(model.param_dims.keys()).index(key) init_param = logit( torch.rand(k) * (param_mean * 2. - param_mean * 0.5) + param_mean * 0.5) start_index = np.sum(model.flat_param_dims[:param_location]) model.reparam.bias.data[start_index:(start_index+np.sum(model.param_dims[key]))] =\ init_param elif variable_types[key] == 'Categorical': freqs = np.unique(train_data[key], return_counts=1)[1] num_cats = len(freqs) param_mean = torch.as_tensor(freqs / np.sum(freqs)) init_param = inverse_softmax(param_mean) init_param = 0.5 * torch.randn(k, num_cats) + init_param init_param = init_param.flatten() param_location = list(model.param_dims.keys()).index(key) start_index = np.sum(model.flat_param_dims[:param_location]) model.reparam.bias.data[start_index:(start_index+np.prod(model.param_dims[key]))] =\ init_param if use_cuda: model.cuda() optimizer = Optimizer(model.parameters(), lr=learning_rate) N = len(train_data) model = VI(model, T, N, batch_size, train_data, optimizer, variable_types) ## Create a generative model based on model parameters and return it generative_model = ReparamXpand(1, input_dim, param_dims, flat_param_dims) generative_model.reparam.bias.detach_() generative_model.reparam.weight.detach_() generative_model.reparam.bias.data = torch.tensor( model.reparam.bias.data.cpu().data.numpy(), device='cpu') generative_model.reparam.weight.data = torch.tensor( model.reparam.weight.data.cpu().data.numpy(), device='cpu') #return generative_model, z_maps return generative_model
def DPVI(model, T, data, batch_size, optimizer, C, sigma, variable_types, verbose=False): input_dim = model.input_dim N = data.shape[0] for i in range(T): ## Take minibatch minibatch = data.sample(batch_size, replace=False) ## Reset optimizer and ELBO optimizer.zero_grad() elbo = 0 ## Draws for mc integration draw_ = torch.randn(1, input_dim) ## MC integration for likelihood part of ELBO draw = model.forward(draw_[0]) ## Compute the log-likelihood contribution log_likelihood_loss = -1*log_likelihood(minibatch,\ draw, variable_types, use_cuda=False) elbo += log_likelihood_loss log_likelihood_loss.backward(retain_graph=True) # Backward call from the data dependent part ## sigma is the std of DP-noise. Thus if we are running with DP, we clip and perturb if sigma>0: """ Using the reparametrization trick, we can write $z = \mu_q + \exp(\log \sigma_q)*\eta$, where \eta \sim N(0,1). Now, as our loss function L can essentially be written as $L = f(z; X)$, the derivative w.r.t $\mu_q$ will be f'(z ; X) and w.r.t $\log \sigma_q = s_q$ it will be $\exp(s)\eta f'(z; X) = \exp(s_q)\eta \nabla_{\mu_q} L$. Thus it suffices to privately compute $\nabla_{\mu_q} L$ and based on that compute $\nabla_{s_q} L$ since the $\exp{s}\eta$ factor of the $\nable_{s_q} L$ will be data independent and thus be considered as post-processing. """ ## Draw the DP noise from N(0, C^2 sigma^2 I), where I=eye(d) noise_b = sigma*C*torch.randn(input_dim) ## Compute the clipping scale clip_bound = torch.clamp(model.reparam.bias.grad.data.norm(dim=1)/C, 1.0) ## Clip gradients model.reparam.bias.grad.data = model.reparam.bias.grad.data.div(clip_bound.unsqueeze(1)) ## Add noise model.reparam.bias.grad.data = (model.reparam.bias.grad.data.sum(0)+noise_b)\ .repeat(batch_size).view_as(model.reparam.bias.grad.data) ## Using the property of reparametrization trick for mean-field Gaussian, we compute the gradient of $s_q$ using noisy gradient of $\mu_q$ model.reparam.weight.grad.data = model.reparam.bias.grad.data*\ model.reparam.weight.data.exp()*draw_[0] ll_bias_grad = model.reparam.bias.grad.data.clone() # save likelihood_grads ll_weight_grad = model.reparam.weight.grad.data.clone() # save likelihood_grads optimizer.zero_grad() # zero the gradients and proceed to computing prior and entropy contributions draw = model.forward(draw_[0]) log_prior_loss = -(batch_size/N)*log_prior(draw, variable_types) elbo += log_prior_loss log_prior_loss.backward(retain_graph=True) if sigma>0: ## Replicate prior gradient contribution to all expanded grads model.reparam.weight.grad.data = model.reparam.weight.grad.data[0].repeat(batch_size).\ view_as(model.reparam.weight.grad.data) model.reparam.bias.grad.data = model.reparam.bias.grad.data[0].repeat(batch_size).\ view_as(model.reparam.bias.grad.data) ## Add entropy to ELBO entropy = -(batch_size/N)*mvn_entropy(model.reparam) elbo += entropy entropy.backward(retain_graph=True) if sigma>0: ## Add log-likelihood grad contributions to grads model.reparam.weight.grad.data.add_(ll_weight_grad) model.reparam.bias.grad.data.add_(ll_bias_grad) # Average gradients model.reparam.bias.grad.data.mul_(N/batch_size) model.reparam.weight.grad.data.mul_(N/batch_size) ## Take step optimizer.step() if verbose: if i % 10 == 0: sys.stdout.write('\r{}% : ELBO = {}'.format(int(i*100/T),-1.*elbo.data.tolist())) if i == T-1: sys.stdout.write('\rDone : ELBO = {}\n'.format((-1.*elbo.data.tolist()))) sys.stdout.flush() model_ = ReparamXpand(1, model.input_dim, model.param_dims, model.flat_param_dims) model_.reparam.bias.data = model.reparam.bias.data[0] model_.reparam.weight.data = model.reparam.weight.data[0] model_.reparam.bias.detach_() model_.reparam.weight.detach_() return model_
def main(): # Set DPVI params T = 80000 C = 2.0 lr = .0005 q = 0.005 batch_size = int(q * N) sigma = float(sys.argv[1]) income = sys.argv[2] seed = int(sys.argv[3]) torch.manual_seed(seed) npr.seed(seed) # Set number of mixture components k = 10 param_dims = OrderedDict() for key, value in variable_types.items(): if key == 'pi_unconstrained': param_dims[key] = [k - 1] else: if value == 'Bernoulli': param_dims[key] = [k] elif value == 'Categorical': param_dims[key] = [k, len(np.unique(data[key]))] elif value == 'Beta': param_dims[key] = [2, k] input_dim = int(np.sum([np.prod(value) for value in param_dims.values()])) flat_param_dims = np.array( [np.prod(value) for value in param_dims.values()]) rich_data = data[data['Target'] == 1] batch_size_rich = int(q * len(rich_data)) poor_data = data[data['Target'] == 0] batch_size_poor = int(q * len(poor_data)) ### Save log date = datetime.date.today().isoformat() wall_start = time.time() cpu_start = time.clock() out_file = open("out_file_{}_{}_{}.txt".format(income, date, sigma), "a") sys.stdout = out_file print("Sigma : {}".format(sigma)) ## Containers for models models = [] from torch.optim import Adam as Optimizer from dpvi import DPVI ## Repeat inference 10 times if income == "rich": rich_model = ReparamXpand(batch_size_rich, input_dim, param_dims, flat_param_dims) optimizer_rich = Optimizer(rich_model.parameters(), lr=lr) # Init mixture fractions to N(0, exp(-2.0)) rich_model.reparam.bias.data[:, -(k - 1):] = 0.0 * torch.ones_like( rich_model.reparam.bias.data[:, -(k - 1):]) rich_model.reparam.weight.data[:, -(k - 1):] = -2.0 * torch.ones_like( rich_model.reparam.weight.data[:, -(k - 1):]) rich_model_ = DPVI(rich_model, T, rich_data, batch_size_rich,\ optimizer_rich, C, sigma, variable_types) models.append(rich_model_) else: poor_model = ReparamXpand(batch_size_poor, input_dim, param_dims, flat_param_dims) optimizer_poor = Optimizer(poor_model.parameters(), lr=lr) poor_model.reparam.bias.data[:, -(k - 1):] = 0.0 * torch.ones_like( poor_model.reparam.bias.data[:, -(k - 1):]) poor_model.reparam.weight.data[:, -(k - 1):] = -2.0 * torch.ones_like( poor_model.reparam.weight.data[:, -(k - 1):]) poor_model_ = DPVI(poor_model, T, poor_data, batch_size_poor,\ optimizer_poor, C, sigma, variable_types) models.append(poor_model_) wall_end = time.time() cpu_end = time.clock() print('Wall time {}'.format(wall_end - wall_start)) print('CPU time {}'.format(cpu_end - cpu_start)) ## Compute privacy budget from privacy.analysis.compute_dp_sgd_privacy import compute_rdp, get_privacy_spent delta = 1e-5 rdp_orders = range(2, 500) rdp_eps = compute_rdp(q, sigma, T, rdp_orders) epsilon = 2 * get_privacy_spent( rdp_orders, rdp_eps, target_delta=delta / 2)[0] pickle.dump( models, open('./res/models_{}_{}_{}_{}.p'.format(income, date, sigma, seed), 'wb')) params = { 'T': T, 'C': C, 'lr': lr, 'k': k, 'q': q, 'sigma': sigma, 'epsilon': epsilon, 'seed': seed } pickle.dump( params, open('./res/params_{}_{}_{}_{}.p'.format(income, date, sigma, seed), 'wb')) out_file.close()