def nn_model_regression(X,Y, test_X, model, num_samples, epsilon, beta, leap_frog_iters, results_manager, noise_variance = None): if noise_variance is None: noise_variance = defaults.noise_variance test_size = test_X.size()[0] criterion = torch.nn.MSELoss(size_average=False) sampler = HMC(model.parameters(), np.random.RandomState(1), epsilon = epsilon , beta = beta, leap_frog_iters = leap_frog_iters ) results_manager.initialize(num_samples, model, test_size, noise_variance) start_time = time.time() for sample_index in tqdm(range(num_samples)): def closure(): sampler.zero_grad() pred = model( X ) energy = 0.5*criterion(pred, Y )/ noise_variance energy.backward() return energy energy = sampler.step( closure ) #get prediction pred = model(test_X).data.cpu().numpy().flatten() results_manager.update(sample_index, pred, energy, model) end_time = time.time() print('Total time' , end_time - start_time ) print('iterations per second', num_samples*1./(end_time - start_time)) results_manager.finalize(sampler.acceptance_rate())
def nn_model_ml(X, Y, model_constructor): criterion = torch.nn.MSELoss(size_average=False) #good settings which give error of 1 nat with 1 extra layer. #num_temperatures = 100000 #num_repeats = 10 num_temperatures = 100000 num_repeats = 100 num_data = Y.size()[0] beta_sequence = np.linspace(0., 1., num_temperatures, endpoint=True) log_weights = np.zeros(num_repeats) rng = np.random.RandomState(1) for repeat_index in range(num_repeats): model = model_constructor() sampler = HMC(model.parameters(), epsilon=0.01, rng=rng, beta=None, leap_frog_iters=1, include_integrator=True) for temperature_index in range(num_temperatures): def log_energy_final(): pred = model(X) energy = 0.5 * criterion( pred, Y) / defaults.noise_variance + 0.5 * num_data * np.log( 2. * np.pi * defaults.noise_variance) return energy def closure(): sampler.zero_grad() beta = Variable(torch.FloatTensor( [beta_sequence[temperature_index]]), requires_grad=False) energy_f = log_energy_final() energy = beta * energy_f energy.backward() return energy if temperature_index == 0: sampler.weight_accumulator.record_start_energy(closure()) else: sampler.step(closure) log_weights[repeat_index] = sampler.weight_accumulator.log_weight.data[ 0] embed()
def nn_model_regression(X,Y, test_X, model, num_samples, burn_in, epsilon, beta, leap_frog_iters ): test_size = test_X.size()[0] #run sampler #at the same time mantain online estimated of mean #and marginal variance #this stops us having to store large numbers of samples. num_points = 0 online_mean = np.zeros(test_size) online_squares = np.zeros(test_size) criterion = torch.nn.MSELoss(size_average=False) sampler = HMC(model.parameters(), np.random.RandomState(1), epsilon = epsilon , beta = beta, leap_frog_iters = leap_frog_iters ) samplerB = ESS(model.parameters(), np.random.RandomState(2) ) energies = np.zeros(num_samples) start_time = time.time() for sample_index in range(num_samples): def closure(): sampler.zero_grad() pred = model( X ) energy = 0.5*criterion(pred, Y )/ defaults.noise_variance energy.backward() return energy sampler.step( closure ) def closureB(): pred = model( X ) energy = 0.5*criterion(pred, Y )/ defaults.noise_variance return energy energies[sample_index] = samplerB.step( closureB ) if sample_index > burn_in: #get prediction pred = model(test_X).data.cpu().numpy().flatten() #do online updates. num_points+=1 delta = pred-online_mean online_mean = online_mean + delta/num_points delta2 = pred-online_mean online_squares = online_squares + delta * delta2 end_time = time.time() print('Total time' , end_time - start_time ) print('iterations per second', num_samples*1./(end_time - start_time)) #embed() return online_mean, online_squares / (num_points + 1)
def nn_model_regression(model, X_train, Y_train, X_test, Y_test): #run sampler #at the same time mantain online estimated of mean #and marginal variance #this stops us having to store large numbers of samples. num_samples = 100000 burn_in = 50 n_test = X_test.size()[0] nlls = np.zeros((num_samples, X_test.size()[0])) num_params = len([elem for elem in model.parameters()]) param_trackers = np.zeros((num_samples, num_params)) pred_trackers = np.zeros((num_samples, n_test)) criterion = torch.nn.MSELoss(size_average=False) sampler = HMC(model.parameters(), np.random.RandomState(1), epsilon=0.05, beta=1, leap_frog_iters=10) samplerB = ESS(model.parameters(), np.random.RandomState(2)) energies = np.zeros(num_samples) start_time = time.time() for sample_index in tqdm(range(num_samples)): def closure(): sampler.zero_grad() pred = model(X_train) energy = 0.5 * criterion(pred, Y_train) / defaults.noise_variance energy.backward() return energy sampler.step(closure) def closureB(): pred = model(X_train) energy = 0.5 * criterion(pred, Y_train) / defaults.noise_variance return energy energies[sample_index] = samplerB.step(closureB) for param, param_index in zip(model.parameters(), range(num_params)): rank = len(param.size()) index = [0] * rank if rank == 1: param_trackers[sample_index, param_index] = param.data[index[0]] else: param_trackers[sample_index, param_index] = param.data[index[0], index[1]] #get prediction pred_test = model(X_test) pred_trackers[sample_index, :] = pred_test.data.numpy().flatten() pred_energies = 0.5 * torch.pow( pred_test - Y_test, 2) / defaults.noise_variance + 0.5 * np.log( 2. * np.pi * defaults.noise_variance) nlls[sample_index, :] = pred_energies.data.numpy().flatten() end_time = time.time() print('Total time', end_time - start_time) print('iterations per second', num_samples * 1. / (end_time - start_time)) valid_holdout = nlls[burn_in:, :] #log p(y* | x* ) \roughly \log \sum \exp (- valid_holdout) - \log(num_samples) #with summation taken over the sample axis. log_pred_densities = (logsumexp(-valid_holdout, axis=0) - np.log(valid_holdout.shape[0])) results = { 'log_densities_NN': log_pred_densities, 'pred_trackers': pred_trackers, 'param_trackers': param_trackers, 'energies': energies } return results