def sample(autoencoder, generator, num_samples, num_features, batch_size=100, noise_size=128, temperature=None, round_features=False): autoencoder, generator = to_cuda_if_available(autoencoder, generator) autoencoder.train(mode=False) generator.train(mode=False) samples = np.zeros((num_samples, num_features), dtype=np.float32) start = 0 while start < num_samples: with torch.no_grad(): noise = Variable(torch.FloatTensor(batch_size, noise_size).normal_()) noise = to_cuda_if_available(noise) batch_code = generator(noise) batch_samples = autoencoder.decode(batch_code, training=False, temperature=temperature) batch_samples = to_cpu_if_available(batch_samples) batch_samples = batch_samples.data.numpy() # if rounding is activated (for ARAE with binary outputs) if round_features: batch_samples = np.round(batch_samples) # do not go further than the desired number of samples end = min(start + batch_size, num_samples) # limit the samples taken from the batch based on what is missing samples[start:end, :] = batch_samples[:min(batch_size, end - start), :] # move to next batch start = end return samples
def pre_train_epoch(autoencoder, data, batch_size, optim=None, variable_sizes=None, temperature=None): autoencoder.train(mode=(optim is not None)) training = optim is not None losses = [] for batch in data.batch_iterator(batch_size): if optim is not None: optim.zero_grad() batch = Variable(torch.from_numpy(batch)) batch = to_cuda_if_available(batch) _, batch_reconstructed = autoencoder(batch, training=training, temperature=temperature, normalize_code=False) loss = categorical_variable_loss(batch_reconstructed, batch, variable_sizes) loss.backward() if training: optim.step() loss = to_cpu_if_available(loss) losses.append(loss.data.numpy()) del loss return losses
def sample(generator, temperature, num_samples, num_features, batch_size=100, noise_size=128): generator = to_cuda_if_available(generator) generator.train(mode=False) samples = np.zeros((num_samples, num_features), dtype=np.float32) start = 0 while start < num_samples: with torch.no_grad(): noise = Variable( torch.FloatTensor(batch_size, noise_size).normal_()) noise = to_cuda_if_available(noise) batch_samples = generator(noise, training=False, temperature=temperature) batch_samples = to_cpu_if_available(batch_samples) batch_samples = batch_samples.data.numpy() # do not go further than the desired number of samples end = min(start + batch_size, num_samples) # limit the samples taken from the batch based on what is missing samples[start:end, :] = batch_samples[:min(batch_size, end - start), :] # move to next batch start = end return samples
def train(autoencoder, generator, discriminator, train_data, val_data, output_ae_path, output_gen_path, output_disc_path, output_loss_path, batch_size=1000, start_epoch=0, num_epochs=1000, num_disc_steps=2, num_gen_steps=1, code_size=128, l2_regularization=0.001, learning_rate=0.001, temperature=None): autoencoder, generator, discriminator = to_cuda_if_available( autoencoder, generator, discriminator) optim_gen = Adam(list(generator.parameters()) + list(autoencoder.decoder.parameters()), weight_decay=l2_regularization, lr=learning_rate) optim_disc = Adam(discriminator.parameters(), weight_decay=l2_regularization, lr=learning_rate) criterion = BCELoss() logger = Logger(output_loss_path, append=start_epoch > 0) for epoch_index in range(start_epoch, num_epochs): logger.start_timer() # train autoencoder.train(mode=True) generator.train(mode=True) discriminator.train(mode=True) disc_losses = [] gen_losses = [] more_batches = True train_data_iterator = train_data.batch_iterator(batch_size) while more_batches: # train discriminator generator.batch_norm_train(mode=False) for _ in range(num_disc_steps): # next batch try: batch = next(train_data_iterator) except StopIteration: more_batches = False break # using "one sided smooth labels" is one trick to improve GAN training label_zeros = Variable(torch.zeros(len(batch))) smooth_label_ones = Variable( torch.FloatTensor(len(batch)).uniform_(0.9, 1)) label_zeros, smooth_label_ones = to_cuda_if_available( label_zeros, smooth_label_ones) optim_disc.zero_grad() # first train the discriminator only with real data real_features = Variable(torch.from_numpy(batch)) real_features = to_cuda_if_available(real_features) real_pred = discriminator(real_features) real_loss = criterion(real_pred, smooth_label_ones) real_loss.backward() # then train the discriminator only with fake data noise = Variable( torch.FloatTensor(len(batch), code_size).normal_()) noise = to_cuda_if_available(noise) fake_code = generator(noise) fake_features = autoencoder.decode(fake_code, training=True, temperature=temperature) fake_features = fake_features.detach( ) # do not propagate to the generator fake_pred = discriminator(fake_features) fake_loss = criterion(fake_pred, label_zeros) fake_loss.backward() # finally update the discriminator weights # using two separated batches is another trick to improve GAN training optim_disc.step() disc_loss = real_loss + fake_loss disc_loss = to_cpu_if_available(disc_loss) disc_losses.append(disc_loss.data.numpy()) del disc_loss del fake_loss del real_loss # train generator generator.batch_norm_train(mode=True) for _ in range(num_gen_steps): optim_gen.zero_grad() noise = Variable( torch.FloatTensor(len(batch), code_size).normal_()) noise = to_cuda_if_available(noise) gen_code = generator(noise) gen_features = autoencoder.decode(gen_code, training=True, temperature=temperature) gen_pred = discriminator(gen_features) smooth_label_ones = Variable( torch.FloatTensor(len(batch)).uniform_(0.9, 1)) smooth_label_ones = to_cuda_if_available(smooth_label_ones) gen_loss = criterion(gen_pred, smooth_label_ones) gen_loss.backward() optim_gen.step() gen_loss = to_cpu_if_available(gen_loss) gen_losses.append(gen_loss.data.numpy()) del gen_loss # validate discriminator autoencoder.train(mode=False) generator.train(mode=False) discriminator.train(mode=False) correct = 0.0 total = 0.0 for batch in val_data.batch_iterator(batch_size): # real data discriminator accuracy with torch.no_grad(): real_features = Variable(torch.from_numpy(batch)) real_features = to_cuda_if_available(real_features) real_pred = discriminator(real_features) real_pred = to_cpu_if_available(real_pred) correct += (real_pred.data.numpy().ravel() > .5).sum() total += len(real_pred) # fake data discriminator accuracy with torch.no_grad(): noise = Variable( torch.FloatTensor(len(batch), code_size).normal_()) noise = to_cuda_if_available(noise) fake_code = generator(noise) fake_features = autoencoder.decode(fake_code, training=False, temperature=temperature) fake_pred = discriminator(fake_features) fake_pred = to_cpu_if_available(fake_pred) correct += (fake_pred.data.numpy().ravel() < .5).sum() total += len(fake_pred) # log epoch metrics for current class logger.log(epoch_index, num_epochs, "discriminator", "train_mean_loss", np.mean(disc_losses)) logger.log(epoch_index, num_epochs, "generator", "train_mean_loss", np.mean(gen_losses)) logger.log(epoch_index, num_epochs, "discriminator", "validation_accuracy", correct / total) # save models for the epoch with DelayedKeyboardInterrupt(): torch.save(autoencoder.state_dict(), output_ae_path) torch.save(generator.state_dict(), output_gen_path) torch.save(discriminator.state_dict(), output_disc_path) logger.flush() logger.close()
def train(autoencoder, generator, discriminator, train_data, val_data, output_ae_path, output_gen_path, output_disc_path, output_loss_path, batch_size=1000, start_epoch=0, num_epochs=1000, num_ae_steps=1, num_disc_steps=2, num_gen_steps=1, noise_size=128, l2_regularization=0.001, learning_rate=0.001, ae_noise_radius=0.2, ae_noise_anneal=0.995, normalize_code=True, variable_sizes=None, temperature=None, penalty=0.1 ): autoencoder, generator, discriminator = to_cuda_if_available(autoencoder, generator, discriminator) optim_ae = Adam(autoencoder.parameters(), weight_decay=l2_regularization, lr=learning_rate) optim_gen = Adam(generator.parameters(), weight_decay=l2_regularization, lr=learning_rate) optim_disc = Adam(discriminator.parameters(), weight_decay=l2_regularization, lr=learning_rate) logger = Logger(output_loss_path, append=start_epoch > 0) for epoch_index in range(start_epoch, num_epochs): logger.start_timer() # train autoencoder.train(mode=True) generator.train(mode=True) discriminator.train(mode=True) ae_losses = [] disc_losses = [] gen_losses = [] more_batches = True train_data_iterator = train_data.batch_iterator(batch_size) while more_batches: # train autoencoder for _ in range(num_ae_steps): try: batch = next(train_data_iterator) except StopIteration: more_batches = False break autoencoder.zero_grad() batch_original = Variable(torch.from_numpy(batch)) batch_original = to_cuda_if_available(batch_original) batch_code = autoencoder.encode(batch_original, normalize_code=normalize_code) batch_code = add_noise_to_code(batch_code, ae_noise_radius) batch_reconstructed = autoencoder.decode(batch_code, training=True, temperature=temperature) ae_loss = categorical_variable_loss(batch_reconstructed, batch_original, variable_sizes) ae_loss.backward() optim_ae.step() ae_loss = to_cpu_if_available(ae_loss) ae_losses.append(ae_loss.data.numpy()) # train discriminator for _ in range(num_disc_steps): try: batch = next(train_data_iterator) except StopIteration: more_batches = False break discriminator.zero_grad() autoencoder.zero_grad() # first train the discriminator only with real data real_features = Variable(torch.from_numpy(batch)) real_features = to_cuda_if_available(real_features) real_code = autoencoder.encode(real_features, normalize_code=normalize_code) real_code = add_noise_to_code(real_code, ae_noise_radius) real_pred = discriminator(real_code) real_loss = - real_pred.mean(0).view(1) real_loss.backward() # then train the discriminator only with fake data noise = Variable(torch.FloatTensor(len(batch), noise_size).normal_()) noise = to_cuda_if_available(noise) fake_code = generator(noise) fake_code = fake_code.detach() # do not propagate to the generator fake_pred = discriminator(fake_code) fake_loss = fake_pred.mean(0).view(1) fake_loss.backward() # this is the magic from WGAN-GP gradient_penalty = calculate_gradient_penalty(discriminator, penalty, real_code, fake_code) gradient_penalty.backward() optim_ae.step() optim_disc.step() disc_loss = real_loss + fake_loss + gradient_penalty disc_loss = to_cpu_if_available(disc_loss) disc_losses.append(disc_loss.data.numpy()) del disc_loss del gradient_penalty del fake_loss del real_loss # train generator for _ in range(num_gen_steps): generator.zero_grad() noise = Variable(torch.FloatTensor(len(batch), noise_size).normal_()) noise = to_cuda_if_available(noise) gen_code = generator(noise) fake_pred = discriminator(gen_code) fake_loss = - fake_pred.mean(0).view(1) fake_loss.backward() optim_gen.step() fake_loss = to_cpu_if_available(fake_loss) gen_losses.append(fake_loss.data.numpy()[0]) del fake_loss # log epoch metrics for current class logger.log(epoch_index, num_epochs, "autoencoder", "train_mean_loss", np.mean(ae_losses)) logger.log(epoch_index, num_epochs, "discriminator", "train_mean_loss", np.mean(disc_losses)) logger.log(epoch_index, num_epochs, "generator", "train_mean_loss", np.mean(gen_losses)) # save models for the epoch with DelayedKeyboardInterrupt(): torch.save(autoencoder.state_dict(), output_ae_path) torch.save(generator.state_dict(), output_gen_path) torch.save(discriminator.state_dict(), output_disc_path) logger.flush() ae_noise_radius *= ae_noise_anneal logger.close()