def train_discriminator(): input_size = 1 n_hidden = 3 n_units = 256 out_size = 1 out_sigmoid = True batch_size = 512 n_epochs = 800 lr = 1e-2 # create model, training criterion and optimizer. criterion = ValueFunction() model = MLP2(input_size, n_hidden, n_units, out_size, out_sigmoid).to(device) print(model) optimizer = torch.optim.SGD(model.parameters(), lr=lr) # get iterators for distributions f1 = iter(samplers.distribution4(batch_size)) f0 = iter(samplers.distribution3(batch_size)) losses = train(model, f1, f0, optimizer, criterion, device, n_epochs) # visualise loss. plt.figure() plt.plot(losses) # plt.show() return model
def gan_eval(): epochs = 1000 batch_size = 1000 hidden_size = 25 n_hidden = 2 input_dim = 1 f0 = samplers.distribution3(batch_size) f1 = samplers.distribution4(batch_size) D = Discriminator(input_dim, hidden_size, n_hidden) train(D, f0, f1, GAN, batch_size=batch_size, epochs=epochs) xx = torch.randn(10000) f = lambda x: torch.tanh(x * 2 + 1) + x * 0.75 d = lambda x: (1 - torch.tanh(x * 2 + 1)**2) * 2 + 0.75 plt.hist(f(xx), 100, alpha=0.5, density=1) plt.hist(xx, 100, alpha=0.5, density=1) plt.xlim(-5, 5) plt.savefig('histogram') plt.close() xx = np.linspace(-5, 5, 1000) N = lambda x: np.exp(-x**2 / 2.) / ((2 * np.pi)**0.5) f0_x_tensor = Variable( torch.from_numpy(np.float32(xx.reshape(batch_size, input_dim)))) D_x = D(f0_x_tensor) f1_est = (N(f0_x_tensor) * D_x) / (1 - D_x) # Plot the discriminator output. r = -D_x.detach().numpy() plt.figure(figsize=(8, 4)) plt.subplot(1, 2, 1) plt.plot(xx, r) plt.title(r'$D(x)$') estimate = f1_est.detach().numpy() # Plot the density. plt.subplot(1, 2, 2) plt.plot(xx, estimate) plt.plot( f(torch.from_numpy(xx)).numpy(), d(torch.from_numpy(xx)).numpy()**(-1) * N(xx)) plt.legend(['Estimated', 'True']) plt.title('Estimated vs True') plt.savefig('Estimated_vs_Exact.png') plt.close()
plt.xlim(-5, 5) # exact xx = np.linspace(-5, 5, 1000) N = lambda x: np.exp(-x**2 / 2.) / ((2 * np.pi)**0.5) plt.plot( f(torch.from_numpy(xx)).numpy(), d(torch.from_numpy(xx)).numpy()**(-1) * N(xx)) plt.plot(xx, N(xx)) ############### import the sampler ``samplers.distribution4'' ############### train a discriminator on distribution4 and standard gaussian ############### estimate the density of distribution4 #######--- INSERT YOUR CODE BELOW ---####### dist4 = samplers.distribution4(batch_size=256) dist3 = samplers.distribution3(batch_size=256) D = train('JSD', dist4, dist3, args.use_cuda, args.setup) #dist1 = samplers.distribution4(batch_size=1) #dist0 = samplers.distribution3(batch_size=1) data_points_D = [] data_points_d4 = [] print(xx) for x in xx: if args.use_cuda: y = D(torch.from_numpy(np.array([N(x)])).float().cuda()) else: y = D(torch.from_numpy(np.array([N(x)])).float()) data_points_D.append(y)
def N(x): return np.exp(-x**2 / 2.) / ((2 * np.pi)**0.5) plt.plot( f(torch.from_numpy(xx)).numpy(), d(torch.from_numpy(xx)).numpy()**(-1) * N(xx)) plt.plot(xx, N(xx)) batch_size = 512 m_minibatch = 100 p_iter = iter(distribution3(batch_size)) fo = p_iter q_iter = iter(distribution4(batch_size)) f1 = q_iter Discrim, jsd = js_divergence(f1, fo, m_minibatch) Discrim = Discrim(torch.Tensor(xx).unsqueeze(dim=1)) r = Discrim.detach().numpy().reshape(-1) plt.figure(figsize=(8, 4)) plt.subplot(1, 2, 1) plt.plot(xx, r) plt.title(r'$D(x)$') # estimate the density of distribution4 (on xx) using the discriminator; estimate = N(xx) * r / (1 - r) plt.subplot(1, 2, 2) plt.plot(xx, estimate)
# Create model discriminator = sq() discriminator.add(Dense(units=128, activation='relu', input_dim=1)) discriminator.add(Dense(units=128, activation='relu')) discriminator.add(Dense(units=128, activation='relu')) discriminator.add(Dense(units=1, activation='sigmoid')) discriminator.compile(loss=D_Loss, optimizer=Adam(lr=0.00005)) # Make target dummy for Keras target_dummy = np.zeros(512 * 2) #Create samplers gaussian_dist_gen = samplers.distribution3() mystery_dist_gen = samplers.distribution4(batch_size=512) for _ in range(EPOCHS): #Sample the distributions x = next(mystery_dist_gen) y = next(gaussian_dist_gen) #Train discriminator.train_on_batch(np.concatenate((x, y)), target_dummy) ############### plotting things ############### (1) plot the output of your trained discriminator ############### (2) plot the estimated density contrasted with the true density r = discriminator.predict(
N = lambda x: np.exp(-x**2 / 2.) / ((2 * np.pi)**0.5) plt.plot( f(torch.from_numpy(xx)).numpy(), d(torch.from_numpy(xx)).numpy()**(-1) * N(xx)) plt.plot(xx, N(xx)) ############### import the sampler ``samplers.distribution4'' ############### train a discriminator on distribution4 and standard gaussian ############### estimate the density of distribution4 #######--- INSERT YOUR CODE BELOW ---####### from samplers import distribution3, distribution4 from problem1 import MLP_Disc, discriminator_obj, js_obj f0 = distribution3(2048) f1 = distribution4(2048) func = MLP_Disc(dim=1) _ = func._train(f1, f0, discriminator_obj, lr=0.001, epochs=1500) # We know from problem 5 that f1 = f0 D*/(1-D*) ############### plotting things ############### (1) plot the output of your trained discriminator ############### (2) plot the estimated density contrasted with the true density r = func(torch.FloatTensor(xx).view(-1, 1)).detach().numpy( )[:, 0] # evaluate xx using your discriminator; replace xx with the output plt.figure(figsize=(8, 4)) plt.subplot(1, 2, 1) plt.plot(xx, r) plt.title(r'$D(x)$')
best_list.append(best_loss) plt.plot(phi_values, best_list, "o") plt.xlabel('$\phi$') plt.ylabel('WD') plt.title('Wasserstein distance per values of $\phi$') plt.savefig("problem1-3-WD.png") plt.show() # Problem 1.4 d_4 = Discriminator(input_size=1) optimizer = torch.optim.SGD(d_4.parameters(), lr=1e-3) criterion = JSDLoss2() f_0 = iter(samplers.distribution3(batch_size=batch_size)) f_1 = iter(samplers.distribution4(batch_size=batch_size)) best_loss = -float('Inf') for batch in range(10000): f_0_samples = torch.as_tensor(next(f_0), dtype=torch.float32) f_1_samples = torch.as_tensor(next(f_1), dtype=torch.float32) optimizer.zero_grad() f_0_outputs = d_4(f_0_samples) f_1_outputs = d_4(f_1_samples) print((torch.log(1 - f_0_outputs)).mean()) loss = -criterion(f_0_outputs, f_1_outputs) loss.backward()
plt.xlim(-5, 5) # exact xx = np.linspace(-5, 5, 512) N = lambda x: np.exp(-x**2 / 2.) / ((2 * np.pi)**0.5) plt.plot( f(torch.from_numpy(xx)).numpy(), d(torch.from_numpy(xx)).numpy()**(-1) * N(xx)) plt.plot(xx, N(xx)) ############### import the sampler ``samplers.distribution4'' ############### train a discriminator on distribution4 and standard gaussian ############### estimate the density of distribution4 #######--- INSERT YOUR CODE BELOW ---####### f0 = samplers.distribution3() f1 = samplers.distribution4() Descr_M = Descr(hidden_size=512, mini_batch=512, learning_rate=0.0001, num_epochs=1000, print_interval=10) Descr_M.run_main_loop(f0, f1) random_input_tensor = Variable( torch.from_numpy( np.float32(xx.reshape(Descr_M.minibatch_size, Descr_M.input_size)))) Network_Output = Descr_M.Network(random_input_tensor) f1_estimated = (1 - Network_Output) / Network_Output * N(random_input_tensor) ############### plotting things
device = torch.device('cpu') js_model = torch.nn.Sequential( torch.nn.Linear(1, 64), torch.nn.ReLU(), torch.nn.Linear(64, 32), torch.nn.ReLU(), torch.nn.Linear(32, 1), torch.nn.Sigmoid(), ).to(device) batch_size = 512 learning_rate = 1e-4 criterion = torch.nn.BCELoss() optimizer_js = torch.optim.Adam(js_model.parameters(), lr=learning_rate) real_dist = iter(distribution4(batch_size)) fake_dist = iter(distribution3(batch_size)) real_label = torch.full((batch_size, ), 1, device=device) fake_label = torch.full((batch_size, ), 0, device=device) for epoch in range(3000): js_model.zero_grad() # pdb.set_trace() # Train on real real_input = torch.from_numpy(next(real_dist).astype(np.float32)) output_real = js_model(real_input) errD_real = criterion(output_real, real_label) errD_real.backward()
plt.plot(phi, values, 'o-') if args.loss_type == "JSD": plt.ylabel("JSD") plt.xlabel("phi") plt.title("JSD vs phi") plt.savefig(directory + '_JSD_phi.png', bbox_inches='tight') elif args.loss_type == "WD": plt.ylabel("Wasserstein Distance") plt.xlabel("Phi") plt.title("WD vs. Phi") plt.savefig(directory + '_WD_phi.png', bbox_inches='tight') plt.clf() if args.question == 4: f0_dist = samplers.distribution3(512) f1_dist = samplers.distribution4(512) model = Discriminator(1, 50, 512, 0) for epoch in range(num_epochs): f1_batch = torch.from_numpy(next(f1_dist)) f0_batch = torch.from_numpy(next(f0_dist)) model.train(f1_batch.type(torch.FloatTensor), f0_batch.type(torch.FloatTensor)) print("train") f1_value = [] f1_real = [] f_0_values = [] discriminator_outputs = [] for x in xx: f_0 = N(x) x = torch.tensor([[x]]) disc = model(x)
import numpy as np import matplotlib.pyplot as plt import torch from torch.autograd import Variable import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import samplers as samplers ## the data also come form the distribution cuda = torch.cuda.is_available() f_0 = next(samplers.distribution3(512)) f_1 = next(samplers.distribution4(1)) X_dim = 1 h_dim = 64 # class Net(nn.Module): # def __init__(self, input_dim=1, hidden_size=32, n_hidden=3): # super(Net, self).__init__() # self.D = torch.nn.Sequential( # torch.nn.Linear(X_dim, h_dim), # torch.nn.ReLU(), # torch.nn.Linear(h_dim, h_dim), # torch.nn.ReLU(), # torch.nn.Linear(h_dim, 1), # torch.nn.Sigmoid() # ) # def forward(self, x): # return self.D(x)
plt.xlim(-5,5) # exact xx = np.linspace(-5,5,1000) N = lambda x: np.exp(-x**2/2.)/((2*np.pi)**0.5) plt.plot(f(torch.from_numpy(xx)).numpy(), d(torch.from_numpy(xx)).numpy()**(-1)*N(xx)) plt.plot(xx, N(xx)) ############### import the sampler ``samplers.distribution4'' ############### train a discriminator on distribution4 and standard gaussian ############### estimate the density of distribution4 #######--- INSERT YOUR CODE BELOW ---####### f0 = distribution3(512) f1 = distribution4(512) D = Discriminator(1) train(D, f1, f0, loss_metric='JSD') x = torch.Tensor(xx).reshape(1000, 1) output = D(x) output = output.detach().numpy().reshape(-1) scaling_factor = output / (1 - output) estimated_density = scaling_factor * N(xx) ############### plotting things ############### (1) plot the output of your trained discriminator ############### (2) plot the estimated density contrasted with the true density
def training_loop(LossFct, x, distribution=1, learning_rate=0.0001, num_epochs=50000): # Device configuration device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') # Distributions properties if distribution == 1: p_gen = samplers.distribution1(0) q_gen = samplers.distribution1(x) nb_input = 2 elif distribution == 4: q_gen = samplers.distribution3(2048) p_gen = samplers.distribution4(2048) nb_input = 1 p_gen.send(None) q_gen.send(None) D = discriminators.Discriminator(n_input=nb_input).to(device) # Loss and optimizer optimizer = torch.optim.SGD(D.parameters(), lr=learning_rate) # Train the model trainLoss = [] trainAcc = [] meanLoss = 0 correct = 0 total = 0 log_frequency = 100 for epoch in range(num_epochs): # exp_lr_scheduler.step() p = torch.from_numpy(p_gen.send(0)).float().to(device) q = torch.from_numpy(q_gen.send(x)).float().to(device) labels_real = torch.ones(p.shape[0]).to(device) labels_fake = torch.zeros(q.shape[0]).to(device) # Forward pass outputs_real = torch.sigmoid(D(p)) outputs_fake = torch.sigmoid(D(q)) predicted_real = (outputs_real.data > 0.5).float().squeeze() predicted_fake = (outputs_fake.data > 0.5).float().squeeze() total += 2 * labels_real.size(0) correct_this_batch = (predicted_real == labels_real).sum().item() + ( predicted_fake == labels_fake).sum().item() correct += correct_this_batch loss = LossFct.forward( outputs_real, outputs_fake, labels_real, labels_fake, p, q, D ) #(torch.log(torch.tensor([2.0])).to(device) + 0.5*criterion(outputs_real, labels_real) + 0.5*criterion(outputs_fake, labels_fake)) meanLoss += loss.item() # Backward and optimize optimizer.zero_grad() loss.backward() optimizer.step() if epoch % log_frequency == 0: print('Epoch [{}/{}]'.format(epoch, num_epochs)) print('Loss: {:.4f}({:.4f}), Acc: {:.3f}({:.3f})'.format( loss.item(), meanLoss / (epoch + 1), correct_this_batch * 100 / (2 * labels_real.size(0)), correct * 100 / total)) trainLoss.append(meanLoss / (epoch + 1)) trainAcc.append(100 * correct / total) return loss, D
f(torch.from_numpy(xx)).numpy(), d(torch.from_numpy(xx)).numpy()**(-1) * N(xx)) plt.plot(xx, N(xx)) plt.savefig("./images/q1_4_1.png") ############### import the sampler ``samplers.distribution4'' ############### train a discriminator on distribution4 and standard gaussian ############### estimate the density of distribution4 #######--- INSERT YOUR CODE BELOW ---####### to_tensor = lambda x: torch.as_tensor(x).float from samplers import distribution4, distribution3 f_0 = distribution3(batch_size=512) # standard gaussian f_1 = distribution4(batch_size=512) # modified 'unknown' distribution print("Training discriminator...") discriminator = get_optimal_discriminator(f_1, f_0, maxsteps=1_000, threshold=1e-3) def estimate_density(xx): d_x = discriminator(xx) # prevent division by zero: d_x = np.maximum(d_x, 1e-8) d_x = np.minimum(d_x, 1 - 1e-8) base_density = N(xx)
D = Discriminator(1) optimizer_D = torch.optim.Adam(D.parameters(), lr=setting.lr) cuda_available = True if torch.cuda.is_available() else False device = torch.device("cuda" if torch.cuda.is_available() else "cpu") cuda_available = True if torch.cuda.is_available() else False device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if cuda_available: D.to(device) Tensor = torch.cuda.FloatTensor if cuda_available else torch.FloatTensor dist_p = iter(distribution3(setting.samp_per_epoch)) dist_q = iter(distribution4(setting.samp_per_epoch)) for epoch in range(setting.n_epochs): samples_p = next(dist_p) samples_q = next(dist_q) q_dist = Tensor(samples_q) p_dist = Tensor(samples_p) optimizer_D.zero_grad() p_decision = D(p_dist) q_decision = D(q_dist) d_loss = -torch.mean(torch.log(q_decision)) - torch.mean(
self.sigmoid = nn.Sigmoid() def forward(self, x): x = self.relu(self.map1(x)) x = self.relu(self.map2(x)) return self.sigmoid(self.map3(x)) cuda_available = True if torch.cuda.is_available() else False #cuda_available = False if cuda_available: D.cuda() Tensor = torch.cuda.FloatTensor if cuda_available else torch.FloatTensor dist_p = iter(distribution3(batch_size)) dist_q = iter(distribution4(batch_size)) D = Discriminator(input_size=d_input_size, hidden_size=d_hidden_size, output_size=d_output_size) optimizer_D = torch.optim.Adam(D.parameters(), lr=lr) for epoch in range(n_epochs): samples_p = next(dist_p) samples_q = next(dist_q) unknown_dist = Tensor(samples_q) std_dist = Tensor(samples_p) optimizer_D.zero_grad()
############### import the sampler ``samplers.distribution4'' ############### train a discriminator on distribution4 and standard gaussian ############### estimate the density of distribution4 #######--- INSERT YOUR CODE BELOW ---####### epoch_count = 25000 unk_disc = Discriminator(input_size=1).cuda() unk_loss = nn.BCELoss(reduction='mean') unk_optim = optim.SGD(unk_disc.parameters(), lr=1e-3) for i in range(epoch_count): unk_disc.zero_grad() #get data for both distributions from samplers real_dist = iter(samplers.distribution4(512)) real_samples = next(real_dist) real_tensor_samples = torch.tensor(real_samples, requires_grad=True).float().cuda() fake_dist = iter(samplers.distribution3(512)) fake_samples = next(fake_dist) fake_tensor_samples = torch.tensor(fake_samples, requires_grad=True).float().cuda() real_targets = torch.ones([len(real_tensor_samples), 1]).cuda() fake_targets = torch.zeros([len(fake_tensor_samples), 1]).cuda() real_output = unk_disc(real_tensor_samples) fake_output = unk_disc(fake_tensor_samples)
############################## Optimizer definung ############################# #optimizer_D = torch.optim.SGD(D.parameters(), lr=setting.lr) optimizer_D = torch.optim.Adam(D.parameters(), lr=setting.lr) ############################################################################### ################################ Running on GPU ############################### cuda_available = True if torch.cuda.is_available() else False #cuda_available = False if cuda_available: # G.cuda() D.cuda() Tensor = torch.cuda.FloatTensor if cuda_available else torch.FloatTensor ############################################################################### ################################### Sampling ################################## dist_r = iter(distribution4(setting.sample_num)) dist_f = iter(distribution2(setting.sample_num)) ############################################################################### ################################### Training ################################## wsd = [] for epoch in range(setting.n_epochs): real_loader_ = next(dist_r) real_data = Tensor(real_loader_) real_data = Variable(real_data.view(real_data.shape[0], 1)) fake_loader_ = next(dist_f) fake_data = Tensor(fake_loader_) fake_data = Variable(fake_data.view(fake_data.shape[0], 1))