else: perceptual_loss = 0 criterion_loss = model.loss(img, outputs, criterion) loss = criterion_weight * criterion_loss + loss_network_weight * perceptual_loss loss.backward() optimizer.step() run_losses.append(outputs.shape[0] * loss.item()) if scheduler is not None: scheduler.step() epoch_loss = sum(run_losses) / len(dataset) loss_history.append(epoch_loss) print('\n epoch [{}/{}], loss:{:.6f} #config={}'.format( epoch + 1, num_epochs, epoch_loss, j)) if epoch % 10 == 0 or epoch == num_epochs - 1: img_temp = to_img(outputs) img_to_save = np.asarray(img_temp[idx_to_save].permute( 1, 2, 0).detach().cpu()) to_save_path = osp.join(save_run_as, 'epoch{}.jpg'.format(epoch)) plt.imsave(to_save_path, np.uint8(img_to_save * 255)) loss_path = osp.join(save_run_as, 'losses.txt') if epoch == 0 and osp.exists(loss_path): os.remove(loss_path) with open(loss_path, 'a+') as f: f.write('{}\n'.format(loss_history[-1])) print('***** Done training {}/{} configurations *****\n'.format( j, len(run_configurations)))
import torch from net import Discriminator, Generator from torch.autograd import Variable from torchvision.utils import save_image from utils import to_img z_dimension = 100 batch_size = 64 D = Discriminator() G = Generator(z_dimension) D.load_state_dict(torch.load("./model/discriminator.pth")) G.load_state_dict(torch.load("./model/generator.pth")) if torch.cuda.is_available(): D = D.cuda().eval() G = G.cuda().eval() with torch.no_grad(): z = Variable(torch.randn(batch_size, z_dimension)).cuda() fake_img = G(z) fake_img = to_img(fake_img.cpu().data) save_image(fake_img, "./result/fake_test.png")
d_loss = d_loss_real + d_loss_fake d_optimizer.zero_grad() d_loss.backward() d_optimizer.step() z = Variable(torch.randn(num_img, z_dimension)).cuda() fake_img = G(z) output = D(fake_img) g_loss = criterion(output, real_label) g_optimizer.zero_grad() g_loss.backward() g_optimizer.step() if (i + 1) % 100 == 0: print( "Epoch [{}/{}], d_loss: {:.6f}, g_loss: {:.6f}, D_real: {:.6f}, D_fake: {:.6f}" .format(epoch, num_epoch, d_loss.data.item(), g_loss.data.item(), real_scores.data.mean(), fake_scores.data.mean())) if epoch == 0: real_images = to_img(real_img.cpu().data) save_image(real_images, "./result/real_images.png") fake_images = to_img(fake_img.cpu().data) save_image(fake_images, "./result/fake_images-{}.png".format(epoch + 1)) torch.save(G.state_dict(), './model/generator.pth') torch.save(D.state_dict(), "./model/discriminator.pth")
loss.backward() optimizer.step() train_loss += loss.data.item() count += 1 # ===================log======================== train_loss /= count if epoch % show_every == 0: val = val_loss(model, test_loader, hidden_size, train=True) print('epoch [{}/{}], loss:{:.4f}, val:{:.4f}, train_loss:{:.4f}'. format(epoch + 1, num_epochs, loss.data.item(), val.data.item(), train_loss)) pic = to_img(output.cpu().data) show(pic[0][0]) torch.save(model.state_dict(), teacher_fname) else: # load teacher model checkpoint = torch.load(teacher_fname) model.load_state_dict(checkpoint) """ ------- Sample 2D latent code ------- """ model.eval() N = 10 # number of images per size range_ = 2 # range of exploration
def train_sdae(batch_size, learning_rate, num_epochs, model_class, dataset_key, noise_type, zero_frac, gaussian_stdev, sp_frac, restore_path, save_path, log_freq, olshausen_path, olshausen_step_size, weight_decay, loss_type, emph_wt_a, emph_wt_b, vae_reconstruction_loss_type, cub_folder, learned_noise_wt, nt_restore_prefix, nt_save_prefix): # set up log folders if not os.path.exists('./01_original'): os.makedirs('./01_original') if not os.path.exists('./02_noisy'): os.makedirs('./02_noisy') if not os.path.exists('./03_output'): os.makedirs('./03_output') if not os.path.exists('./04_filters'): os.makedirs('./04_filters') if not os.path.exists('./05_stdev'): os.makedirs('./05_stdev') # set up model and criterion model = init_model(model_class, restore_path, restore_required=False) if isinstance(model, modules.SVAE): criterion = init_loss( 'vae', reconstruction_loss_type=vae_reconstruction_loss_type) else: criterion = init_loss(loss_type) if len(learned_noise_wt) < model.num_blocks: len_diff = model.num_blocks - len(learned_noise_wt) learned_noise_wt.extend(learned_noise_wt[-1:] * len_diff) # load data data_loader, sample_c, sample_h, sample_w, data_minval, data_maxval = init_data_loader( dataset_key, True, batch_size, olshausen_path, olshausen_step_size, cub_folder) original_size = sample_c * sample_h * sample_w # training loop affected = None warning_displayed = False original, noisy, output = None, None, None for ae_idx in range(model.num_blocks): stdev = None nt_loss = None nt_optimizer = None noise_transformer = None if learned_noise_wt[ae_idx] > 0: noise_transformer = modules.NoiseTransformer(original_size) if torch.cuda.is_available(): noise_transformer = noise_transformer.cuda() if nt_restore_prefix is not None: nt_restore_path = '%s_%d.pth' % (nt_restore_prefix, ae_idx) if os.path.exists(nt_restore_path): noise_transformer.load_state_dict( torch.load(nt_restore_path)) print('restored noise transformer from %s' % nt_restore_path) else: print('warning: checkpoint %s not found, skipping...' % nt_restore_path) nt_optimizer = torch.optim.Adam(noise_transformer.parameters(), lr=learning_rate, weight_decay=weight_decay) # train one block at a time print('--------------------') print('training block %d/%d' % (ae_idx + 1, model.num_blocks)) print('--------------------') model_optimizer = torch.optim.Adam(model.get_block_parameters(ae_idx), lr=learning_rate, weight_decay=weight_decay) for epoch in range(num_epochs): mean_loss, total_num_examples = 0, 0 for batch_idx, data in enumerate(data_loader): original, _ = data original = original.float() if not model.is_convolutional: original = original.view(original.size(0), -1) if torch.cuda.is_available(): original = original.cuda() original = model.encode(original) if isinstance(model, modules.SVAE): original = original[ 1] # (sampled latent vector, mean, log_var) original = original.detach() # apply noise if learned_noise_wt[ae_idx] > 0: stdev = noise_transformer.compute_stdev(original) noisy = noise_transformer.apply_noise(original, stdev) else: if noise_type == 'mn': noisy, affected = zero_mask(original, zero_frac) elif noise_type == 'gs': noisy, affected = add_gaussian(original, gaussian_stdev) elif noise_type == 'sp': noisy, affected = salt_and_pepper( original, sp_frac, data_minval, data_maxval) else: if not warning_displayed: print('unrecognized noise type: %r' % (noise_type, )) print('using clean image as input') warning_displayed = True noisy = original noisy = noisy.detach() if torch.cuda.is_available(): noisy = noisy.cuda() # =============== forward =============== if isinstance(model, modules.SVAE): output, mean, log_var = model(noisy, ae_idx) loss = criterion(output, original, mean, log_var) batch_size_ = original.size( 0) # might be undersized last batch total_num_examples += batch_size_ # assumes `loss` is sum for batch mean_loss += (loss - mean_loss * batch_size_) / total_num_examples else: output = model(noisy, ae_idx) if (emph_wt_a != 1 or emph_wt_b != 1) and noise_type != 'gs': # emphasize corrupted dimensions in the loss loss = emph_wt_a * criterion(output[affected], original[affected]) + \ emph_wt_b * criterion(output[1 - affected], original[1 - affected]) else: loss = criterion(output, original) mean_loss += (loss - mean_loss) / ( batch_idx + 1) # assumes `loss` is mean for batch if learned_noise_wt[ae_idx] > 0: # encourage large standard deviations nt_loss = loss - learned_noise_wt[ae_idx] * torch.mean( stdev) # =============== backward ============== if learned_noise_wt[ae_idx] > 0: nt_optimizer.zero_grad() nt_loss.backward(retain_graph=True) nt_optimizer.step() model_optimizer.zero_grad() loss.backward() model_optimizer.step() # =================== log =================== print('epoch {}/{}, loss={:.6f}'.format(epoch + 1, num_epochs, mean_loss.item())) if epoch % log_freq == 0 or epoch == num_epochs - 1: # save images if ae_idx == 0: to_save = [ (to_img(original.data.cpu()), './01_original', 'original'), (to_img(noisy.data.cpu()), './02_noisy', 'noisy'), (to_img(output.data.cpu()), './03_output', 'output'), (to_img(model.get_first_layer_weights(as_tensor=True)), './04_filters', 'filters'), ] for img, folder, desc in to_save: save_image_wrapper( img, os.path.join(folder, '{}_{}.png'.format(desc, epoch + 1))) # save learned stdev if learned_noise_wt[ae_idx] > 0: stdev_path = os.path.join( './05_stdev', 'stdev_{}_{}.txt'.format(ae_idx, epoch + 1)) np.savetxt(stdev_path, stdev.data.cpu().numpy(), fmt='%.18f') print('[o] saved stdev to %s' % stdev_path) # save model(s) torch.save(model.state_dict(), save_path) print('[o] saved model to %s' % save_path) if learned_noise_wt[ae_idx] > 0 and nt_save_prefix is not None: nt_save_path = '%s_%d.pth' % (nt_save_prefix, ae_idx) torch.save(noise_transformer.state_dict(), nt_save_path) print('[o] saved lvl-%d noise transformer to %s' % (ae_idx, nt_save_path)) model.num_trained_blocks += 1 original_size = model.get_enc_out_features(ae_idx) plot_first_layer_weights(model)
recon, mu, log_std = vae(inputs) loss = vae.loss_function(recon, inputs, mu, log_std) optimizer.zero_grad() loss.backward() optimizer.step() train_loss += loss.item() i += 1 if batch_id % 100 == 0: print("Epoch[{}/{}], Batch[{}/{}], batch_loss:{:.6f}".format( epoch+1, epochs, batch_id+1, len(data_loader), loss.item())) print("======>epoch:{},\t epoch_average_batch_loss:{:.6f}============".format(epoch+1, train_loss/i), "\n") # save imgs if epoch % 10 == 0: imgs = utils.to_img(recon.detach()) path = "./img/vae/epoch{}.png".format(epoch+1) torchvision.utils.save_image(imgs, path, nrow=10) print("save:", path, "\n") torchvision.utils.save_image(img, "./img/vae/raw.png", nrow=10) print("save raw image:./img/vae/raw/png", "\n") # save val model utils.save_model(vae, "./model_weights/vae/vae_weights.pth")
img.size(0), -1) # (batch_size, channel*width*height) -> (128, 784) img = img.cuda() # ===================forward===================== output = model(img) # (128, 784) loss = criterion(output, img) # ===================backward==================== optimizer.zero_grad() loss.backward() optimizer.step() # ===================log======================== print('epoch [{}/{}], loss:{:.4f}'.format(epoch + 1, num_epochs, loss.item())) if epoch % 10 == 0: pic = to_img(img.cpu().data) save_image(pic, f'{OUTPUT_PATH}/input_image_{epoch}.png') pic = to_img(output.cpu().data) save_image(pic, f'{OUTPUT_PATH}/output_image_{epoch}.png') pic = to_img(model.decode(model.encode(img)).cpu().data) save_image(pic, f'{OUTPUT_PATH}/encode_decode_image_{epoch}.png') tsne_plot(X=img.cpu().numpy(), y=label.numpy(), filename=f'{OUTPUT_PATH}/tsne_input_{epoch}.html') tsne_plot(X=output.cpu().data.numpy(), y=label.numpy(), filename=f'{OUTPUT_PATH}/tsne_output_{epoch}.html')
noisy = noisy.cuda() # =============== forward =============== output, mean, log_var = model(noisy) loss = criterion(output, original, mean, log_var) batch_size_ = original.size(0) # might be undersized last batch total_num_examples += batch_size_ # assumes `loss` is sum for batch mean_loss += (loss - mean_loss * batch_size_) / total_num_examples # =============== backward ============== model_optimizer.zero_grad() loss.backward() model_optimizer.step() # =================== log =================== print('epoch {}/{}, loss={:.6f}'.format(epoch + 1, num_epochs, mean_loss.item())) if epoch % log_freq == 0 or epoch == num_epochs - 1: # save images to_save = [ (to_img(original.data.cpu()), './01_original', 'original'), (to_img(noisy.data.cpu()), './02_noisy', 'noisy'), (to_img(output.data.cpu()), './03_output', 'output'), ] for img, folder, desc in to_save: save_image_wrapper(img, os.path.join(folder, '{}_{}.png'.format(desc, epoch + 1))) # save model(s) torch.save(model.state_dict(), save_path) print('[o] saved model to %s' % save_path)
for data in train_loader: img, y = data img = img.view(img.size(0), -1) img = img.cuda() y = y.cuda() # ===================forward===================== #output = model.decoder(Z) #output = model(img) z = student_model.encoder(img) output = student_model.decoder(z) z2 = teacher_model.encoder(img) #print(z[0],z2[0]) pic = to_img(output.cpu().data) show(pic[0][0]) #print( y[0]) break """# Finetune student with Jacobian""" # initialize optimizer criterion = nn.MSELoss() optimizer = torch.optim.Adam(student_model.parameters(), lr=params.lr, weight_decay=params.wd) lambda_z = params.lambda_z lambda_jacobian = params.lambda_jacobian lambda_xcov = params.lambda_xcov