def train_fn(loader, disc, gen, opt_gen, opt_disc, mse, bce, vgg_loss): loop = tqdm(loader, leave=False) for idx, (low_res, high_res) in enumerate(loop): high_res = high_res.to(config.DEVICE) low_res = low_res.to(config.DEVICE) #train discriminator -> max E[log(D(x))] + E[1-log(D(G(z))] fake = gen(low_res) disc_real = disc(high_res) disc_fake = disc(fake.detach()) disc_loss_real = bce( disc_real, torch.ones_like(disc_real) - 0.1 * torch.rand_like(disc_real)) disc_loss_fake = bce(disc_fake, torch.zeros_like(disc_fake)) loss_disc = disc_loss_fake + disc_loss_real opt_disc.zero_grad() loss_disc.backward() opt_disc.step() #train Generator -> max log(D(G(z))) disc_fake = disc(fake) adverserial_loss = 1e-3 * bce(disc_fake, torch.ones_like(disc_fake)) loss_for_vgg = 0.006 * vgg_loss(fake, high_res) gen_loss = adverserial_loss + loss_for_vgg opt_gen.zero_grad() gen_loss.backward() opt_gen.step() if idx % 200 == 0: plot_examples("Data/LR", gen)
def train_fn(loader, disc, gen, opt_gen, opt_disc, mse, bce, vgg_loss): loop = tqdm(loader, leave=True) for idx, (low_res, high_res) in enumerate(loop): high_res = high_res.to(config.DEVICE) low_res = low_res.to(config.DEVICE) ### Train Discriminator: max log(D(x)) + log(1 - D(G(z))) fake = gen(low_res) disc_real = disc(high_res) disc_fake = disc(fake.detach()) disc_loss_real = bce( disc_real, torch.ones_like(disc_real) - 0.1 * torch.rand_like(disc_real)) disc_loss_fake = bce(disc_fake, torch.zeros_like(disc_fake)) loss_disc = disc_loss_fake + disc_loss_real opt_disc.zero_grad() loss_disc.backward() opt_disc.step() # Train Generator: min log(1 - D(G(z))) <-> max log(D(G(z)) disc_fake = disc(fake) #l2_loss = mse(fake, high_res) adversarial_loss = 1e-3 * bce(disc_fake, torch.ones_like(disc_fake)) loss_for_vgg = 0.006 * vgg_loss(fake, high_res) gen_loss = loss_for_vgg + adversarial_loss opt_gen.zero_grad() gen_loss.backward() opt_gen.step() if idx % 200 == 0: plot_examples("test_images/", gen)
transform = A.Compose([ A.Resize(width=1920, height=1080), A.RandomCrop(width=1280, height=720), A.Rotate(limit=40, p=0.9, border_mode=cv2.BORDER_CONSTANT), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.1), A.RGBShift(r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.9), A.OneOf([ A.Blur(blur_limit=3, p=0.5), A.ColorJitter(p=0.5), ], p=1.0), ], bbox_params=A.BboxParams(format="pascal_voc", min_area=2048, min_visibility=0.3, label_fields=[])) images_list = [image] saved_bboxes = [bboxes[0]] for i in range(15): augmentations = transform(image=image, bboxes=bboxes) augmented_img = augmentations["image"] if len(augmentations["bboxes"]) == 0: continue images_list.append(augmented_img) saved_bboxes.append(augmentations["bboxes"][0]) plot_examples(images_list, saved_bboxes)
image = Image.open("../dataset/IMAGES/elon.jpeg") mask = Image.open("../dataset/IMAGES/mask.jpeg") mask2 = Image.open("../dataset/IMAGES/second_mask.jpeg") transform = A.Compose([ A.Resize(width=1920, height=1080), A.RandomCrop(width=1280, height=720), A.Rotate(limit=40, p=0.5, border_mode=cv2.BORDER_CONSTANT), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.1), A.RGBShift(r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.5), A.OneOf([ A.Blur(blur_limit=3, p=0.5), A.ColorJitter(p=0.5), ], p=1.) ]) images_list = [image] image = np.array(image) mask = np.array(mask) mask2 = np.array(mask2) for i in range(4): augmentations = transform(image=image, masks=[mask, mask2]) augmented_img = augmentations["image"] augmented_masks = augmentations["masks"] images_list.append(augmented_img) images_list.append(augmented_masks[0]) images_list.append(augmented_masks[1]) plot_examples(images_list)
transform = A.Compose( [ A.Resize(width=1920, height=1080), A.RandomCrop(width=1280, height=720), A.Rotate(limit=40, p=0.9, border_mode=cv2.BORDER_CONSTANT), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.1), A.RGBShift(r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.9), A.OneOf([ A.Blur(blur_limit=3, p=0.5), A.ColorJitter(p=0.5), ], p=1.0), ], bbox_params=A.BboxParams(format="pascal_voc", min_area=2048, min_visibility=0.3, label_fields=[]) ) image_lst = [image] saved_bboxes = [bboxes[0]] for i in range(15): augmentations = transform(image=image, bboxes = bboxes) augmented_img = augmentations["image"] if len(augmentations["bboxes"]) == 0: continue image_lst.append(augmented_img) saved_bboxes.append(augmentations["bboxes"][0]) plot_examples(image_lst, saved_bboxes)
criterion = nn.MSELoss() net = Net() runs = {} net.apply(init_weights) optimizer = LossgradOptimizer(torch.optim.SGD(net.parameters(), lr=1e-4), net, criterion) runs['lossgrad'] = train_model(device, net, criterion, train_data, test_data, batch_size, max_epoch, optimizer) train_loss = test_autoencoder(device, net, train_data, criterion, batch_size) test_loss = test_autoencoder(device, net, test_data, criterion, batch_size) print(f'Final train loss: {train_loss}') print(f'Final test loss: {test_loss}') net.apply(init_weights) optimizer = torch.optim.SGD(net.parameters(), lr=1.0) runs['sgd'] = train_model(device, net, criterion, train_data, test_data, batch_size, max_epoch, optimizer) train_loss = test_autoencoder(device, net, train_data, criterion, batch_size) test_loss = test_autoencoder(device, net, test_data, criterion, batch_size) print(f'Final train loss: {train_loss}') print(f'Final test loss: {test_loss}') plots_dir_name = 'plots' plot_results(plots_dir_name, runs, 'train_losses') plot_results(plots_dir_name, runs, 'test_losses') plot_examples(plots_dir_name, device, net, test_data, runs)
def train(dataset, num_slots, z_dim, scale, beta, gamma, lr, batch_size, steps, _run, _log): if len(_run.observers) == 0: _log.warning('Running without observers') train_file = os.path.join('data', dataset, 'data.pt') data = TensorDataset(torch.load(train_file)) loader = DataLoader(data, batch_size, shuffle=True, num_workers=1, drop_last=True) iterator = utils.make_data_iterator(loader) _, im_channels, im_size, _ = next(iter(loader))[0].shape model = MONet(im_size, im_channels, num_slots, z_dim, scale).to(device) optimizer = torch.optim.RMSprop(model.parameters(), lr) log_every = 500 save_every = 10000 max_samples = 16 metrics = defaultdict(float) for step in range(1, steps + 1): # Train batch = next(iterator).to(device) nll, kl, mask_kl, recs, masks = model(batch) loss = nll + beta * kl + gamma * mask_kl optimizer.zero_grad() loss.backward() optimizer.step() # Log metrics['nll'] += nll.item() metrics['kl'] += kl.item() metrics['mask_kl'] += mask_kl.item() metrics['loss'] += loss.item() if step % log_every == 0: log = f'[{step:d}/{steps:d}] ' for m in metrics: metrics[m] /= log_every log += f'{m}: {metrics[m]:.6f} ' _run.log_scalar(m, metrics[m], step) metrics[m] = 0.0 _log.info(log) # Save if step % save_every == 0: x = batch[:max_samples] recs = recs[:max_samples] masks = masks[:max_samples] final = torch.sum(recs * masks, dim=1) recs = recs.reshape(-1, im_channels, im_size, im_size) masks = masks.reshape(-1, 1, im_size, im_size) utils.plot_examples(x, f'original_{step:d}', num_cols=1) utils.plot_examples(recs, f'reconstruction_{step:d}', num_slots) utils.plot_examples(masks, f'mask_{step:d}', num_slots) utils.plot_examples(final, f'final_{step:d}', num_cols=1) model_file = f'monet_{dataset}.pt' torch.save(model.state_dict(), model_file) _run.add_artifact(model_file) os.remove(model_file)
d_scaler, writer, tb_step, ) if config.SAVE_MODEL: save_checkpoint(gen, opt_gen, filename=config.CHECKPOINT_GEN) save_checkpoint(disc, opt_disc, filename=config.CHECKPOINT_DISC) if __name__ == "__main__": try_model = True if try_model: # Will just use pretrained weights and run on images # in test_images/ and save the ones to SR in saved/ gen = Generator(in_channels=3).to(config.DEVICE) opt_gen = optim.Adam(gen.parameters(), lr=config.LEARNING_RATE, betas=(0.0, 0.9)) load_checkpoint( config.CHECKPOINT_GEN, gen, opt_gen, config.LEARNING_RATE, ) plot_examples("test_images/", gen) else: # This will train from scratch main()
gen_steps=GEN_STEPS, critic_steps=CRITIC_STEPS, gp_weight=GP_WEIGHT, cycle_weight=CYCLE_WEIGHT, ) cyclegan.build(IMAGE_SIZE, gen_lr=GEN_LR, critic_lr=CRITIC_LR) data_x_train = data.get_dataset(IMAGE_SIZE, shuffle=True, male=True) data_y_train = data.get_dataset(IMAGE_SIZE, shuffle=True, male=False) cyclegan.train(data_x_train, data_y_train, ITERATIONS) cyclegan.save() data_x_test = data.get_dataset(IMAGE_SIZE, shuffle=False, male=True, test=True) data_y_test = data.get_dataset(IMAGE_SIZE, shuffle=False, male=False, test=True) # forward pass: male -> female -> male fake_y = cyclegan.predict_fake_y(data_x_test) reco_x = cyclegan.predict_reco_x(data_x_test) utils.plot_examples( data_x_test, fake_y, reco_x, base_name="images/forward/example" ) # backward pass: female -> male -> female fake_x = cyclegan.predict_fake_x(data_y_test) reco_y = cyclegan.predict_reco_y(data_y_test) utils.plot_examples( data_y_test, fake_x, reco_y, base_name="images/backward/example" )
disc, gen, opt_gen, opt_disc, l1, vgg_loss, g_scaler, d_scaler, writer, tb_step, ) if config.SAVE_MODEL: save_checkpoint(gen, opt_gen, filename=config.CHECKPOINT_GEN) save_checkpoint(disc, opt_disc, filename=config.CHECKPOINT_DISC) if __name__ == "__main__": try_model = True if try_model: gen = Generator(in_channels=3).to(config.DEVICE) opt_gen = optim.Adam(gen.parameters(), lr=config.LEARNING_RATE, betas=(0.0, 0.9)) load_checkpoint(config.CHECKPOINT_GEN, gen, opt_gen, config.LEARNING_RATE) gen.eval() plot_examples("data/val", gen) else: main()
def experiment(**kwargs): """Run a dataset-adv experiment. Pull from DB or use defaults.""" # Set default training parameters device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(('Using device: {}'.format(device))) hps = utils.default_hps('biggan') hps['epochs'] = 1 # Update params with kwargs pull_from_db = kwargs['pull_from_db'] if pull_from_db: exp = db.get_experiment_trial(False) if exp is None: raise RuntimeError('All experiments are complete.') else: exp = kwargs for k, v in exp.items(): if k in hps: hps[k] = v print(('Setting {} to {}'.format(k, v))) # Create results directory utils.make_dir(hps['results_dir']) # Other params we won't write to DB save_examples = False im_dir = 'screenshots' trainable = True reset_inner_optimizer = True # Reset adam params after every epoch if hps['dataset'] == 'biggan': num_classes = 1000 model_output = 1000 elif hps['dataset'] == 'psvrt': num_classes = 2 model_output = 2 else: raise NotImplementedError(hps['dataset']) net_loss = nn.CrossEntropyLoss(reduction='mean') # Create results directory utils.make_dir(hps['results_dir']) # Add hyperparams and model info to DB dt = datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d_%H:%M:%S') hps['dt'] = dt run_name = '{}_{}_{}'.format(hps['dataset'], hps['model_name'], dt) # Initialize net net, img_size = utils.initialize_model( dataset=hps['dataset'], model_name=hps['model_name'], num_classes=model_output, siamese=hps['siamese'], siamese_version=hps['siamese_version'], trainable=trainable, pretrained=hps['pretrained']) if hps['dataset'] == 'biggan': img_size = 224 elif hps['dataset'] == 'psvrt': img_size = 80 # 160 ds = import_module('data_generators.{}'.format(hps['dataset'])) P = ds.Generator(dataset=hps['dataset'], img_size=img_size, device=device, siamese=hps['siamese'], task=hps['task'], wn=hps['wn'], num_classes=num_classes) if hps['adv_version'] == 'flip': [p.register_hook(reversal) for p in P.parameters()] net = net.to(device) P = P.to(device) net_optimizer = utils.get_optimizer(net=net, optimizer=hps['optimizer'], lr=hps['inner_lr'], amsgrad=hps['amsgrad'], trainable=trainable) if hps['dataset'] == 'biggan': outer_params = P.named_parameters() outer_params = [v for k, v in outer_params if 'model' not in k] else: outer_params = P.parameters() r_optimizer = getattr(optim, hps['optimizer'])(outer_params, lr=hps['outer_lr'], amsgrad=hps['amsgrad']) # Optimize r inner_losses, outer_losses = [], [] inner_loop_steps, outer_loop_steps = [], [] epochs = int(hps['epochs']) inner_loop_criterion = hps['inner_loop_criterion'] outer_loop_criterion = hps['outer_loop_criterion'] if inner_loop_criterion: inner_steps = hps['inner_steps'] else: inner_steps = int(hps['inner_steps']) if outer_loop_criterion: outer_steps = hps['outer_steps'] else: outer_steps = int(hps['outer_steps']) for epoch in tqdm(list(range(epochs)), total=epochs, desc='Epoch'): # Inner loop starts here net.train() net._initialize() # Reset thetas P.set_not_trainable() if reset_inner_optimizer: if epoch == 0: reset_adam_state = net_optimizer.state net_optimizer.state = reset_adam_state # Reset adam parameters with tqdm(total=inner_steps) as inner_pbar: if inner_loop_criterion: L = np.inf i = 0 while L > inner_steps: L = inner_loop(net=net, net_loss=net_loss, net_optimizer=net_optimizer, P=P, device=device, inner_pbar=inner_pbar, batch_size=hps['batch_size']) i += 1 else: for i in range(inner_steps): L = inner_loop(net=net, net_loss=net_loss, net_optimizer=net_optimizer, P=P, device=device, inner_pbar=inner_pbar, batch_size=hps['batch_size']) inner_loop_steps += [i] # TODO: Compute hessian over init_training_steps here # TODO: Pass adams from inner_optimizer to r_optimizer inner_losses += [L.cpu().data.numpy()] # Outer loop starts here net.eval() # Enable test-time batch norms P.set_trainable() if save_examples: utils.plot_examples(path=os.path.join( im_dir, '{}_outer_init_{}'.format(dt, epoch)), n=16, P=P) with tqdm(total=outer_steps) as inner_pbar: if outer_loop_criterion: L = np.inf i = 0 while L > outer_steps: Lo, generative_losses, r_loss, grads = outer_loop( batch_size=hps['batch_size'], outer_batch_size_multiplier=hps[ 'outer_batch_size_multiplier'], # noqa adv_version=hps['adv_version'], num_classes=num_classes, net_optimizer=net_optimizer, r_optimizer=r_optimizer, net=net, net_loss=net_loss, device=device, loss=hps['loss'], P=P, outer_steps=outer_steps, alpha=hps['alpha'], beta=hps['beta'], inner_pbar=inner_pbar) i += 1 else: for i in range(outer_steps): Lo, generative_losses, r_loss, grads = outer_loop( batch_size=hps['batch_size'], outer_batch_size_multiplier=hps[ 'outer_batch_size_multiplier'], # noqa adv_version=hps['adv_version'], num_classes=num_classes, net_optimizer=net_optimizer, r_optimizer=r_optimizer, net=net, net_loss=net_loss, device=device, loss=hps['loss'], P=P, outer_steps=outer_steps, alpha=hps['alpha'], beta=hps['beta'], i=i, inner_pbar=inner_pbar) outer_losses += [Lo] path = os.path.join(hps['results_dir'], '{}_gradients'.format(run_name)) if pull_from_db: # Update DB results_dict = {'_id': exp['_id'], 'file_path': path} db.update_grad_experiment([results_dict]) # Save epoch results if save_examples: utils.plot_examples(path=os.path.join( im_dir, '{}_outer_optim_{}'.format(run_name, epoch)), n=16, P=P) for k, v in grads.items(): if v is not None: try: v = v.detach().cpu().numpy() except Exception as e: print('Failed to detach {}'.format(k)) v = v.cpu().numpy() grads[k] = v grads.update(hps) np.savez(path, **grads) print('Finished the experiment!')
import torch from models import VAE from utils import plot_examples model = VAE(im_size=64) model.load_state_dict(torch.load('vae_sbd.pt', map_location='cpu')) data = torch.load('data/train.pt')[:8] plot_examples(data, name='original') mse, kl, x_rec = model(data) plot_examples(x_rec, name='reconstructions')
def trainNet( datapath='.', nepochs=1, learning_rate=0.001, batch_size=64, cuda=False, savedir='./', lossPlotName='loss.png', num_workers=24, ): ''' Our basic training file ''' if "/" not in savedir[-1]: savedir += "/" if cuda: print(f"Running on GPU") device = torch.device('cuda') net = model.VGG().to(device) else: print(f"Running on CPU") device = torch.device('cpu') net = model.VGG() # Dataset #imagenet = datasets.ImageNet('/research/imgnet/ILSVRC2013_DET_train/', # split='train') t = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), ]) train_images = datasets.CIFAR10('.', train=True, download=True, transform=t) train_data = torch.utils.data.DataLoader(train_images, batch_size=batch_size, shuffle=True, num_workers=num_workers) #criterion = nn.MSELoss() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(net.parameters(), lr=learning_rate) epoch_loss_array = torch.zeros(nepochs) print(f"Train data {len(train_images)}") for epoch in range(nepochs): net.train() epoch_loss = 0 for i, (img, label) in enumerate(train_data): optimizer.zero_grad() out = net(img.to(device)) loss = criterion(out.to(device), label.to(device)) epoch_loss += loss.item() loss.backward() optimizer.step() epoch_loss /= (i + 1) print(f"Epoch {epoch} loss {epoch_loss}") epoch_loss_array[epoch] = epoch_loss utils.plot_loss(epoch_loss_array, savedir + lossPlotName) net.load_state_dict(torch.load('final_model.pt')) # Testing with torch.no_grad(): net.eval() test_loss = 0. test_images = datasets.CIFAR10('.', train=False, download=True, transform=t) test_data = torch.utils.data.DataLoader(test_images, batch_size=batch_size, shuffle=True, num_workers=num_workers) classes = [ 'plane', 'car', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck' ] corrects = np.zeros(len(classes), dtype=np.int) totals = np.zeros(len(classes), dtype=np.int) trues = np.zeros(len(test_images)) preds = np.zeros(len(test_images)) for i, (img, label) in enumerate(test_data): optimizer.zero_grad() out = net(img.to(device)) trues[i * batch_size:(i + 1) * batch_size] = label.to('cpu').numpy() preds[i * batch_size:(i + 1) * batch_size] = [np.argmax(o) for o in out.to('cpu').numpy()] for o, l in zip(out, label): o = o.to('cpu').numpy() l = l.to('cpu').numpy() totals[l] += 1 if np.argmax(o) == l: corrects[l] += 1 loss = criterion(out.to(device), label.to(device)) test_loss += loss.item() if i == 0: utils.plot_examples(img[:9].to('cpu'), out[:9].to('cpu').numpy(), classes, label[:9].to('cpu').numpy(), savedir + "examples.png") test_loss /= (i + 1) print(f"Test loss {test_loss}") utils.confusionMatrix(trues, preds, classes) torch.save(net.state_dict(), "final_model.pt") #print(f"Corrects {corrects}") #print(f"Totals {totals}") print("Accuracy") print(f"Name\tCorrects\tAccuracy") for i in range(len(classes)): print(f"{classes[i]}\t{corrects[i]}\t\t{corrects[i]/totals[i]}") print(30 * "-") print(f"Sum\t{np.sum(corrects)}\t\t{np.sum(corrects)/np.sum(totals)}")
def train_fn( loader, disc, gen, opt_gen, opt_disc, l1, vgg_loss, g_scaler, d_scaler, writer, tb_step, ): loop = tqdm(loader, leave=True) for idx, (low_res, high_res) in enumerate(loop): high_res = high_res.to(config.DEVICE) low_res = low_res.to(config.DEVICE) with torch.cuda.amp.autocast(): fake = gen(low_res) critic_real = disc(high_res) critic_fake = disc(fake.detach()) gp = gradient_penalty(disc, high_res, fake, device=config.DEVICE) loss_critic = ( -(torch.mean(critic_real) - torch.mean(critic_fake)) + config.LAMBDA_GP * gp) opt_disc.zero_grad() d_scaler.scale(loss_critic).backward() d_scaler.step(opt_disc) d_scaler.update() # Train Generator: min log(1 - D(G(z))) <-> max log(D(G(z)) with torch.cuda.amp.autocast(): l1_loss = 1e-2 * l1(fake, high_res) adversarial_loss = 5e-3 * -torch.mean(disc(fake)) loss_for_vgg = vgg_loss(fake, high_res) gen_loss = l1_loss + loss_for_vgg + adversarial_loss opt_gen.zero_grad() g_scaler.scale(gen_loss).backward() g_scaler.step(opt_gen) g_scaler.update() writer.add_scalar("Critic loss", loss_critic.item(), global_step=tb_step) tb_step += 1 if idx % 100 == 0 and idx > 0: plot_examples("test_images/", gen) loop.set_postfix( gp=gp.item(), critic=loss_critic.item(), l1=l1_loss.item(), vgg=loss_for_vgg.item(), adversarial=adversarial_loss.item(), ) return tb_step
optimizer.zero_grad() mse_loss, kl, out = model(batch) loss = mse_loss + kl loss.backward() optimizer.step() train_loss += loss.item() train_mse += mse_loss.item() train_kl += kl.item() if (i + 1) % 200 == 0: train_loss /= 200 train_mse /= 200 train_kl /= 200 print( log.format(i + 1, len(loader), train_mse, train_kl, train_loss)) writer.add_scalar('loss/total', train_loss, steps) writer.add_scalar('loss/mse', train_mse, steps) writer.add_scalar('loss/kl', train_kl, steps) train_loss = 0 train_mse = 0 train_kl = 0 for name, param in model.enc_convs.named_parameters(): writer.add_histogram(name, param.clone().cpu().data.numpy(), epoch) plot_examples(batch.cpu(), 'original', save=True) plot_examples(out.cpu().detach(), 'reconstruction', save=True) torch.save(model.state_dict(), f'vae_{decoder}.pt')
mask = Image.open("./input/image_aug/albu_img/mask.jpeg") mask2 = Image.open("./input/image_aug/albu_img/second_mask.jpeg") transform = A.Compose([ A.Resize(width=1920, height=1080), A.RandomCrop(width=1280, height=720), A.Rotate(limit=40, p=0.9, border_mode=cv2.BORDER_CONSTANT), A.HorizontalFlip(p=0.5), A.VerticalFlip(p=0.1), A.RGBShift(r_shift_limit=25, g_shift_limit=25, b_shift_limit=25, p=0.9), A.OneOf([ A.Blur(blur_limit=3, p=0.5), A.ColorJitter(p=0.5), ], p=1.0), ]) image_lst = [image] image = np.array(image) mask = np.array(mask) mask2 = np.array(mask2) for i in range(4): augmentation = transform(image=image, masks=[mask, mask2]) augmented_img = augmentation["image"] augmented_mask = augmentation["masks"] image_lst.append(augmented_img) image_lst.append(augmented_mask[0]) image_lst.append(augmented_mask[1]) plot_examples(image_lst)
def train_fn( loader, disc, gen, opt_gen, opt_disc, l1, vgg_loss, g_scaler, d_scaler, writer, tb_step, ): loop = tqdm(loader, leave=True) for idx, (lr, hr) in enumerate(loop): hr = hr.to(config.DEVICE) lr = lr.to(config.DEVICE) # Train Discriminator with torch.cuda.amp.autocast(): fake = gen(lr) disc_real = disc(hr) disc_fake = disc(fake.detach()) gp = gradient_penalty(disc, hr, fake, device=config.DEVICE) # ?? 相对loss loss_disc = config.LAMBDA_GP * gp - (torch.mean(disc_real) - torch.mean(disc_fake)) opt_disc.zero_grad() d_scaler.scale(loss_disc).backward() d_scaler.step(opt_disc) d_scaler.update() # Train Generator with torch.cuda.amp.autocast(): l1_loss = 1e-2 * l1(fake, hr) adversarial_loss = 5e-3 * -torch.mean(disc(fake)) vgg_for_loss = vgg_loss(fake, hr) loss_gen = l1_loss + adversarial_loss + vgg_for_loss opt_gen.zero_grad() g_scaler.scale(loss_gen).backward() g_scaler.step(opt_gen) g_scaler.update() writer.add_scalar("Disc loss", loss_disc.item(), global_step=tb_step) tb_step += 1 if idx % 1 == 0: plot_examples("data/val", gen) loop.set_postfix( gp=gp.item(), disc=loss_disc.item(), l1=l1_loss.item(), vgg=vgg_for_loss.item(), adversarial=adversarial_loss.item(), ) return tb_step
def experiment(**kwargs): """Run a dataset-adv experiment. Pull from DB or use defaults.""" # Set default training parameters device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(('Using device: {}'.format(device))) hps = utils.default_hps('biggan') hps = utils.default_hps('psvrt') hps['epochs'] = 1 # Update params with kwargs pull_from_db = kwargs['pull_from_db'] if pull_from_db: exp = db.get_experiment_trial(True) if exp is None: raise RuntimeError('All experiments are complete.') else: exp = kwargs for k, v in exp.items(): if k in hps: hps[k] = v print(('Setting {} to {}'.format(k, v))) # Create results directory utils.make_dir(hps['results_dir']) # Other params we won't write to DB save_every = 1000 save_examples = False im_dir = 'screenshots' trainable = True reset_inner_optimizer = False # Reset adam params after every epoch reset_theta = False if hps['dataset'] == 'biggan': num_classes = 1000 model_output = 1000 elif hps['dataset'] == 'psvrt': num_classes = 2 model_output = 2 else: raise NotImplementedError(hps['dataset']) net_loss = nn.CrossEntropyLoss(reduction='mean') # Create results directory utils.make_dir(hps['results_dir']) # Add hyperparams and model info to DB dt = datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d-%H_%M_%S') hps['dt'] = dt run_name = '{}_{}_{}'.format(hps['dataset'], hps['model_name'], dt) # Initialize net net, img_size = utils.initialize_model( dataset=hps['dataset'], model_name=hps['model_name'], num_classes=model_output, siamese=hps['siamese'], siamese_version=hps['siamese_version'], trainable=trainable, pretrained=hps['pretrained']) if hps['dataset'] == 'biggan': img_size = 224 # net.track_running_stats = False elif hps['dataset'] == 'psvrt': img_size = 160 ds = import_module('data_generators.{}'.format(hps['dataset'])) P = ds.Generator(dataset=hps['dataset'], img_size=img_size, device=device, siamese=hps['siamese'], task=hps['task'], wn=hps['wn'], num_classes=num_classes) if hps['adv_version'] == 'flip': [p.register_hook(utils.reversal) for p in P.parameters()] net = net.to(device) P = P.to(device) net_optimizer = utils.get_optimizer(net=net, optimizer=hps['optimizer'], lr=hps['inner_lr'], amsgrad=hps['amsgrad'], trainable=trainable) if hps['dataset'] == 'biggan': outer_params = [v for k, v in P.named_parameters() if 'model' not in k] if P.embedding_grad: outer_params = [{ 'params': outer_params }, { 'params': P.get_embed()[0][1], 'lr': hps['emb_lr'] }] # outer_params += [P.embedding] else: outer_params = P.parameters() r_optimizer = getattr(optim, hps['optimizer'])(outer_params, lr=hps['outer_lr'], amsgrad=hps['amsgrad']) # Add tensorboard if requested if hps['gen_tb']: writer = SummaryWriter(log_dir=os.path.join('runs', run_name)) print('Saving tensorboard to: {}'.format(os.path.join( 'runs', run_name))) else: writer = None # Optimize r inner_losses, outer_losses = [], [] inner_loop_steps, outer_loop_steps = [], [] all_params = [] epochs = int(hps['epochs']) inner_loop_criterion = hps['inner_loop_criterion'] outer_loop_criterion = hps['outer_loop_criterion'] if hps['inner_loop_criterion']: inner_steps = hps['inner_steps'] else: inner_steps = int(hps['inner_steps']) if hps['outer_loop_criterion']: outer_steps = hps['outer_steps'] else: outer_steps = int(hps['outer_steps']) for epoch in tqdm(list(range(epochs)), total=epochs, desc='Epoch'): # Inner loop starts here net.train() P.set_not_trainable() if reset_theta: net._initialize() if reset_inner_optimizer: if epoch == 0: reset_adam_state = net_optimizer.state net_optimizer.state = reset_adam_state # Reset adam parameters with tqdm() as inner_pbar: if inner_loop_criterion: L = np.inf i = 0 while L > inner_steps: L = utils.inner_loop(net=net, net_loss=net_loss, net_optimizer=net_optimizer, P=P, device=device, inner_pbar=inner_pbar, batch_size=hps['batch_size']) i += 1 else: for i in range(inner_steps): L = utils.inner_loop(net=net, net_loss=net_loss, net_optimizer=net_optimizer, P=P, device=device, inner_pbar=inner_pbar, batch_size=hps['batch_size']) inner_loop_steps += [i] # TODO: Pass adams from inner_optimizer to r_optimizer inner_losses += [L.item()] # cpu().data.numpy()] # Outer loop starts here if hps['use_bn']: net.eval() # Careful!! else: net.train() P.set_trainable() if save_examples: utils.plot_examples(path=os.path.join( im_dir, '{}_outer_init_{}'.format(run_name, epoch)), n_subplots=16, n_batches=10, P=P) try: with tqdm() as inner_pbar: rmu = 0. if outer_loop_criterion: L = np.inf i = 0 while L > outer_steps: Lo, generative_losses, r_loss, batch, rmu, net_ce, params = utils.outer_loop( # noqa batch_size=hps['batch_size'], outer_batch_size_multiplier=hps[ 'outer_batch_size_multiplier'], # noqa adv_version=hps['adv_version'], num_classes=num_classes, net_optimizer=net_optimizer, r_optimizer=r_optimizer, net=net, net_loss=net_loss, running_mean=rmu, device=device, loss=hps['loss'], P=P, alpha=hps['alpha'], beta=hps['beta'], writer=writer, i=i, inner_pbar=inner_pbar) if (hps['save_i_params'] and i % hps['save_i_params'] == 0): all_params += [utils.prep_params(params)] i += 1 else: for i in range(outer_steps): Lo, generative_losses, r_loss, batch, rmu, net_ce, params = utils.outer_loop( # noqa batch_size=hps['batch_size'], outer_batch_size_multiplier=hps[ 'outer_batch_size_multiplier'], # noqa adv_version=hps['adv_version'], num_classes=num_classes, net_optimizer=net_optimizer, r_optimizer=r_optimizer, net=net, net_loss=net_loss, running_mean=rmu, device=device, loss=hps['loss'], P=P, alpha=hps['alpha'], beta=hps['beta'], i=i, writer=writer, inner_pbar=inner_pbar) if (hps['save_i_params'] and i % hps['save_i_params'] == 0): all_params += [utils.prep_params(params)] outer_losses += [Lo.item()] # cpu().data.numpy()] outer_loop_steps += [i] except Exception as e: print('Outer optimization failed. {}\n' 'Saving results and exiting.'.format(e)) if pull_from_db: # Update DB with results results_dict = { 'experiment_id': exp['_id'], 'inner_loss': inner_losses[epoch].tolist(), 'outer_loss': outer_losses[epoch].tolist(), 'inner_loop_steps': inner_loop_steps[epoch], 'outer_loop_steps': outer_loop_steps[epoch], 'net_loss': net_ce.item(), # cpu().data.numpy(), 'params': json.dumps(utils.prep_params(P)), } db.add_results([results_dict]) break # Save epoch results if save_examples: utils.plot_examples(path=os.path.join( im_dir, '{}_outer_optim_{}'.format(run_name, epoch)), n_subplots=16, n_batches=10, P=P) # pds += [utils.prep_params(P)] if epoch % save_every == 0: np.save( os.path.join(hps['results_dir'], '{}_inner_losses'.format(run_name)), inner_losses) np.save( os.path.join(hps['results_dir'], '{}_outer_losses'.format(run_name)), outer_losses) save_params = utils.prep_params(P) if P.embedding_grad: save_params['embedding'] = P.get_embed()[0][1].detach().cpu( ).numpy() # noqa save_params[ 'embedding_original'] = P.embedding_original.detach().cpu( ).numpy() # noqa np.save( os.path.join(hps['results_dir'], '{}_all_params'.format(run_name)), all_params) np.savez( os.path.join(hps['results_dir'], '{}_final_params'.format(run_name)), **save_params) if len(inner_loop_steps): np.save( os.path.join(hps['results_dir'], '{}_inner_steps'.format(run_name)), inner_loop_steps) np.save( os.path.join(hps['results_dir'], '{}_outer_steps'.format(run_name)), outer_loop_steps) if pull_from_db: # Update DB with results results_dict = { 'experiment_id': exp['_id'], 'inner_loss': inner_losses[epoch].tolist(), 'outer_loss': outer_losses[epoch].tolist(), 'inner_loop_steps': inner_loop_steps[epoch], 'outer_loop_steps': outer_loop_steps[epoch], 'net_loss': net_ce.item(), # cpu().data.numpy(), 'params': json.dumps(utils.prep_params(P)), } db.add_results([results_dict]) print('Finished {}!'.format(run_name))
def main(args): # Get data data, dataset_name = get_dataset(args.dataset) dataset_name = f"{dataset_name}-{args.latent_dim}" output_dir = os.path.join(f"outputs-{args.mode}", dataset_name) os.makedirs(output_dir, exist_ok=True) # set logger logger = simple_logger(os.path.join(output_dir, "results.csv")) autoencoders = get_autoencoders(data[0].shape[1], args.latent_dim, args.mode) evaluation_methods = get_evaluation_methods(args.mode, logger) train_data, train_labels, test_data, test_labels = data print( f"run_analysis on {len(train_data)} train and {len(test_data)} test samples" ) for ae in autoencoders: logger.log(f"{ae}", end="") print(ae) # Learn encoding on train data train on it and test on test encodings ae.learn_encoder_decoder( train_data, os.path.join(output_dir, "Training-autoencoder")) start = time() print("\tProjecting Data... ", end="") projected_train_data = ae.encode(train_data) projected_test_data = ae.encode(test_data) print(f"Finished in {time() - start:.2f} sec") if args.plot_latent_interpolation: start = time() print("\tVisualizing latent interpolation... ", end="") plot_latent_interpolation(ae, train_data, plot_path=os.path.join( output_dir, "Latent-interpollation", f"{ae}-Train.png")) plot_latent_interpolation(ae, test_data, plot_path=os.path.join( output_dir, "Latent-interpollation", f"{ae}-Test.png")) print(f"Finished in {time() - start:.2f} sec") if args.plot_tsne: # Run T-SNE start = time() print("\tRunning T-SNE... ", end="") plot_tsne(projected_train_data, train_labels, os.path.join(output_dir, "T-SNE", f"{ae}-Train.png")) plot_tsne(projected_test_data, test_labels, os.path.join(output_dir, "T-SNE", f"{ae}-Test.png")) print(f"Finished in {time() - start:.2f} sec") projected_data = (projected_train_data, projected_test_data) for evaluator in evaluation_methods: result_str = evaluator.evaluate(ae, data, projected_data, plot_path=os.path.join( output_dir, "Evaluation", f"{evaluator}_{ae}.png")) logger.log(f",{result_str}", end="") logger.log("") plot_examples(autoencoders, test_data, plot_path=os.path.join(output_dir, "Test-reconstruction.png"))