Beispiel #1
0
def infer(T, C, sigma, batch_size, Optimizer, lr, X_apps, k):
    from linear import ReparamXpand
    N = len(X_apps)
    ## Initialize and expand model
    ### Define model for reparametrization
    param_dims = {
        'theta_unconstrained': [k, X_apps.shape[-1]],
        'pi_unconstrained': [k - 1]
    }
    param_dims = OrderedDict(param_dims)

    ### Compute the total number of parameters in model
    input_dim = int(np.sum([np.prod(value) for value in param_dims.values()]))
    flat_param_dims = np.array(
        [np.prod(value) for value in param_dims.values()])

    if sigma > 0:
        model = ReparamXpand(batch_size, input_dim, param_dims,
                             flat_param_dims)
        optimizer = Optimizer(model.parameters(), lr=lr)
    else:
        model = ReparamXpand(1, input_dim, param_dims, flat_param_dims)
        optimizer = Optimizer(model.parameters(), lr=lr)
    if use_cuda:
        X_apps = X_apps.cuda()
        model.cuda()
    model.reparam.weight.data[:, -(k - 1):].mul_(0)
    model.reparam.bias.data[:, -(k - 1):].mul_(0)
    ## Training model
    model = DPVI(model, T, N, batch_size, X_apps, sigma, C, optimizer)
    ## Create a generative model based on model parameters and return it
    generative_model = ReparamXpand(1, input_dim, param_dims, flat_param_dims)
    generative_model.reparam.bias.detach_()
    generative_model.reparam.weight.detach_()
    generative_model.reparam.bias.data = torch.tensor(
        model.reparam.bias.data.cpu()[0].data.numpy(), device='cpu')
    generative_model.reparam.weight.data = torch.tensor(
        model.reparam.weight.data.cpu()[0].data.numpy(), device='cpu')
    return generative_model
def infer(T, C, sigma, batch_size, n_mc, Optimizer, learning_rate, train_data):
    ## Initialize and expand model

    input_dim = train_data.shape[1] - 1
    model = ReparamXpand(batch_size, input_dim)
    if use_cuda:
        model.cuda()
    optimizer = Optimizer(model.parameters(), lr=learning_rate)
    model = DPVI(model, T, n_mc, train_data.shape[0], \
       batch_size, train_data, sigma, C, optimizer, use_cuda)

    ## Create a generative model based on model parameters and return it
    generative_model = ReparamXpand(1, input_dim)
    generative_model.reparam.bias.data = torch.tensor(
        model.reparam.bias.data.cpu()[0].data.numpy(), device='cpu')
    generative_model.reparam.weight.data = torch.tensor(
        model.reparam.weight.data.cpu()[0].data.numpy(), device='cpu')
    generative_model.reparam.bias.detach_()
    generative_model.reparam.weight.detach_()
    return generative_model
def infer(T, batch_size, Optimizer, learning_rate, train_data, variable_types,
          k):
    ## Initialize and expand model
    param_dims = OrderedDict()
    for key, value in variable_types.items():
        if key == 'pi_unconstrained':
            param_dims[key] = [k - 1]
        else:
            if value == 'Bernoulli':
                param_dims[key] = [k]
            elif (key == 'lex.dur' and variable_types[key] == None):
                param_dims[key] = [2, k]
            elif (key == 'ep' and variable_types[key] == None):
                param_dims[key] = [k]
            elif (key == 'dead' and variable_types[key] == None):
                param_dims[key] = [k]
            elif value == 'Beta':
                param_dims[key] = [2, k]
            elif value == 'Categorical':
                param_dims[key] = [k, len(np.unique(train_data[key]))]

    input_dim = int(np.sum([np.prod(value) for value in param_dims.values()]))
    flat_param_dims = np.array(
        [np.prod(value) for value in param_dims.values()])
    model = ReparamXpand(1, input_dim, param_dims, flat_param_dims)
    model.reparam.bias.data = model.reparam.bias.data.flatten()
    model.reparam.weight.data = model.reparam.weight.data.flatten()

    ### Init model close to feature means
    def logit(y):
        return torch.log(y) - torch.log(1. - y)

    def inverse_softmax(y):
        last = 1e-23 * torch.ones(1)  # just something small
        sum_term = -50. - torch.log(last)
        x = torch.log(y) - sum_term
        return x

    ### Init model close to feature means
    ## Laplace mech with small epsilon to guarantee DP of the initialization
    for key in train_data.columns:
        if variable_types[key] == 'Bernoulli' or key in ['dead']:
            param_mean = torch.as_tensor(train_data[key].mean(0))
            param_location = list(model.param_dims.keys()).index(key)
            init_param = logit(
                torch.rand(k) * (param_mean * 2. - param_mean * 0.5) +
                param_mean * 0.5)

            start_index = np.sum(model.flat_param_dims[:param_location])
            model.reparam.bias.data[start_index:(start_index+np.sum(model.param_dims[key]))] =\
                                            init_param
        elif variable_types[key] == 'Categorical':
            freqs = np.unique(train_data[key], return_counts=1)[1]
            num_cats = len(freqs)
            param_mean = torch.as_tensor(freqs / np.sum(freqs))
            init_param = inverse_softmax(param_mean)
            init_param = 0.5 * torch.randn(k, num_cats) + init_param
            init_param = init_param.flatten()
            param_location = list(model.param_dims.keys()).index(key)
            start_index = np.sum(model.flat_param_dims[:param_location])
            model.reparam.bias.data[start_index:(start_index+np.prod(model.param_dims[key]))] =\
                                            init_param

    if use_cuda:
        model.cuda()
    optimizer = Optimizer(model.parameters(), lr=learning_rate)
    N = len(train_data)
    model = VI(model, T, N, batch_size, train_data, optimizer, variable_types)

    ## Create a generative model based on model parameters and return it
    generative_model = ReparamXpand(1, input_dim, param_dims, flat_param_dims)
    generative_model.reparam.bias.detach_()
    generative_model.reparam.weight.detach_()
    generative_model.reparam.bias.data = torch.tensor(
        model.reparam.bias.data.cpu().data.numpy(), device='cpu')
    generative_model.reparam.weight.data = torch.tensor(
        model.reparam.weight.data.cpu().data.numpy(), device='cpu')
    #return generative_model, z_maps
    return generative_model
Beispiel #4
0
def DPVI(model, T, data, batch_size, optimizer, C, sigma, variable_types, verbose=False):
	input_dim = model.input_dim
	N = data.shape[0]
	for i in range(T):
		## Take minibatch
		minibatch = data.sample(batch_size, replace=False)
		## Reset optimizer and ELBO
		optimizer.zero_grad()
		elbo = 0
		## Draws for mc integration
		draw_ = torch.randn(1, input_dim)
		## MC integration for likelihood part of ELBO
		draw = model.forward(draw_[0])

		## Compute the log-likelihood contribution		
		log_likelihood_loss = -1*log_likelihood(minibatch,\
				draw, variable_types, use_cuda=False)
		elbo += log_likelihood_loss
		log_likelihood_loss.backward(retain_graph=True) # Backward call from the data dependent part
		## sigma is the std of DP-noise. Thus if we are running with DP, we clip and perturb
		if sigma>0:
			"""
			 Using the reparametrization trick, we can write $z = \mu_q + \exp(\log \sigma_q)*\eta$, where \eta \sim N(0,1).
			 Now, as our loss function L can essentially be written as $L = f(z; X)$, the derivative w.r.t $\mu_q$
			 will be f'(z ; X) and w.r.t $\log \sigma_q = s_q$ it will be $\exp(s)\eta f'(z; X) = \exp(s_q)\eta \nabla_{\mu_q} L$.
			 Thus it suffices to privately compute $\nabla_{\mu_q} L$ and based on that compute $\nabla_{s_q} L$ since 
			 the $\exp{s}\eta$ factor of the $\nable_{s_q} L$ will be data independent and thus be considered as post-processing.
			"""
			## Draw the DP noise from N(0, C^2 sigma^2 I), where I=eye(d)			
			noise_b = sigma*C*torch.randn(input_dim)
			## Compute the clipping scale
			clip_bound = torch.clamp(model.reparam.bias.grad.data.norm(dim=1)/C, 1.0)
			## Clip gradients 
			model.reparam.bias.grad.data = model.reparam.bias.grad.data.div(clip_bound.unsqueeze(1))
			## Add noise
			model.reparam.bias.grad.data = (model.reparam.bias.grad.data.sum(0)+noise_b)\
										.repeat(batch_size).view_as(model.reparam.bias.grad.data)
			## Using the property of reparametrization trick for mean-field Gaussian, we compute the gradient of $s_q$ using noisy gradient of $\mu_q$
			model.reparam.weight.grad.data = model.reparam.bias.grad.data*\
					model.reparam.weight.data.exp()*draw_[0]
			ll_bias_grad = model.reparam.bias.grad.data.clone() # save likelihood_grads
			ll_weight_grad = model.reparam.weight.grad.data.clone() # save likelihood_grads
			optimizer.zero_grad() # zero the gradients and proceed to computing prior and entropy contributions
			draw = model.forward(draw_[0])
			
		log_prior_loss = -(batch_size/N)*log_prior(draw, variable_types)
		elbo += log_prior_loss
		log_prior_loss.backward(retain_graph=True)
		if sigma>0:
			## Replicate prior gradient contribution to all expanded grads
			model.reparam.weight.grad.data = model.reparam.weight.grad.data[0].repeat(batch_size).\
											view_as(model.reparam.weight.grad.data)
			model.reparam.bias.grad.data = model.reparam.bias.grad.data[0].repeat(batch_size).\
											view_as(model.reparam.bias.grad.data)

		## Add entropy to ELBO
		entropy = -(batch_size/N)*mvn_entropy(model.reparam)
		elbo += entropy
		entropy.backward(retain_graph=True)
		if sigma>0:
			## Add log-likelihood grad contributions to grads
			model.reparam.weight.grad.data.add_(ll_weight_grad)
			model.reparam.bias.grad.data.add_(ll_bias_grad)
		# Average gradients
		model.reparam.bias.grad.data.mul_(N/batch_size)
		model.reparam.weight.grad.data.mul_(N/batch_size)
		
		## Take step
		optimizer.step()
		if verbose:
			if i % 10 == 0: 
				sys.stdout.write('\r{}% : ELBO = {}'.format(int(i*100/T),-1.*elbo.data.tolist()))
			if i == T-1: 
				sys.stdout.write('\rDone : ELBO = {}\n'.format((-1.*elbo.data.tolist())))
			sys.stdout.flush()

	model_ =  ReparamXpand(1, model.input_dim, model.param_dims, model.flat_param_dims)
	model_.reparam.bias.data = model.reparam.bias.data[0]
	model_.reparam.weight.data = model.reparam.weight.data[0]
	model_.reparam.bias.detach_()
	model_.reparam.weight.detach_()
	return model_
Beispiel #5
0
def main():
    # Set DPVI params
    T = 80000
    C = 2.0
    lr = .0005
    q = 0.005
    batch_size = int(q * N)
    sigma = float(sys.argv[1])
    income = sys.argv[2]
    seed = int(sys.argv[3])
    torch.manual_seed(seed)
    npr.seed(seed)
    # Set number of mixture components
    k = 10
    param_dims = OrderedDict()
    for key, value in variable_types.items():
        if key == 'pi_unconstrained':
            param_dims[key] = [k - 1]
        else:
            if value == 'Bernoulli':
                param_dims[key] = [k]
            elif value == 'Categorical':
                param_dims[key] = [k, len(np.unique(data[key]))]
            elif value == 'Beta':
                param_dims[key] = [2, k]

    input_dim = int(np.sum([np.prod(value) for value in param_dims.values()]))
    flat_param_dims = np.array(
        [np.prod(value) for value in param_dims.values()])

    rich_data = data[data['Target'] == 1]
    batch_size_rich = int(q * len(rich_data))
    poor_data = data[data['Target'] == 0]
    batch_size_poor = int(q * len(poor_data))

    ### Save log
    date = datetime.date.today().isoformat()
    wall_start = time.time()
    cpu_start = time.clock()
    out_file = open("out_file_{}_{}_{}.txt".format(income, date, sigma), "a")
    sys.stdout = out_file
    print("Sigma : {}".format(sigma))

    ## Containers for models
    models = []

    from torch.optim import Adam as Optimizer
    from dpvi import DPVI
    ## Repeat inference 10 times
    if income == "rich":
        rich_model = ReparamXpand(batch_size_rich, input_dim, param_dims,
                                  flat_param_dims)
        optimizer_rich = Optimizer(rich_model.parameters(), lr=lr)
        # Init mixture fractions to N(0, exp(-2.0))
        rich_model.reparam.bias.data[:, -(k - 1):] = 0.0 * torch.ones_like(
            rich_model.reparam.bias.data[:, -(k - 1):])
        rich_model.reparam.weight.data[:, -(k - 1):] = -2.0 * torch.ones_like(
            rich_model.reparam.weight.data[:, -(k - 1):])
        rich_model_ = DPVI(rich_model, T, rich_data, batch_size_rich,\
          optimizer_rich, C, sigma, variable_types)
        models.append(rich_model_)
    else:
        poor_model = ReparamXpand(batch_size_poor, input_dim, param_dims,
                                  flat_param_dims)
        optimizer_poor = Optimizer(poor_model.parameters(), lr=lr)
        poor_model.reparam.bias.data[:, -(k - 1):] = 0.0 * torch.ones_like(
            poor_model.reparam.bias.data[:, -(k - 1):])
        poor_model.reparam.weight.data[:, -(k - 1):] = -2.0 * torch.ones_like(
            poor_model.reparam.weight.data[:, -(k - 1):])

        poor_model_ = DPVI(poor_model, T, poor_data, batch_size_poor,\
          optimizer_poor, C, sigma, variable_types)
        models.append(poor_model_)
    wall_end = time.time()
    cpu_end = time.clock()
    print('Wall time {}'.format(wall_end - wall_start))
    print('CPU time {}'.format(cpu_end - cpu_start))

    ## Compute privacy budget
    from privacy.analysis.compute_dp_sgd_privacy import compute_rdp, get_privacy_spent
    delta = 1e-5
    rdp_orders = range(2, 500)
    rdp_eps = compute_rdp(q, sigma, T, rdp_orders)
    epsilon = 2 * get_privacy_spent(
        rdp_orders, rdp_eps, target_delta=delta / 2)[0]

    pickle.dump(
        models,
        open('./res/models_{}_{}_{}_{}.p'.format(income, date, sigma, seed),
             'wb'))
    params = {
        'T': T,
        'C': C,
        'lr': lr,
        'k': k,
        'q': q,
        'sigma': sigma,
        'epsilon': epsilon,
        'seed': seed
    }
    pickle.dump(
        params,
        open('./res/params_{}_{}_{}_{}.p'.format(income, date, sigma, seed),
             'wb'))
    out_file.close()