def __init__(self, args): np.random.seed(int(time.time())) self.store = Store() self.visual = Visual(self.store) self.image_shape = [28, 28, 1] # 28x28 pixels and black white self.batch_size = args.batch_size self.lr = args.learning_rate self.train_epoch = args.train_epoch self.dropout_keep_probability = tf.placeholder("float") self.mnist = input_data.read_data_sets("MNIST_data/", one_hot=True, reshape=[]) self.is_training = tf.placeholder(dtype=tf.bool) self.x = tf.placeholder(tf.float32, shape=(None, 64, 64, 1), name="X_Input") self.z = tf.placeholder(tf.float32, shape=(None, 1, 1, 100), name="Z") self.G_z = define_generator(self.z, self.is_training) D_real, D_real_logits = define_discriminator(self.x, self.is_training) D_fake, D_fake_logits = define_discriminator(self.G_z, self.is_training, reuse=True) D_loss_real = init_loss(D_real_logits, tf.ones, self.batch_size) D_loss_fake = init_loss(D_fake_logits, tf.zeros, self.batch_size) self.G_loss = init_loss(D_fake_logits, tf.ones, self.batch_size) self.D_loss = D_loss_real + D_loss_fake self.sess = None
# stacked autoencoder sae = init_model(args.sae_model_class, args.sae_restore_path, False) sae.num_trained_blocks = sae.num_blocks sae_parameters = list(sae.parameters()) # obtain output dimensionality of final encoder enc_out_features = sae.get_enc_out_features(-1) # classifier classifier = init_model(args.classifier_model_class, args.classifier_restore_path, restore_required=False, enc_out_features=enc_out_features) parameters = sae_parameters + list(classifier.parameters()) # loss and optimization criterion_train = init_loss(args.loss_type) criterion_eval = init_loss(args.loss_type, reduction='sum') optimizer = torch.optim.Adam(parameters, lr=args.learning_rate, weight_decay=args.weight_decay) # load data data_loader_train, _, _, _, _, _ = init_data_loader( args.dataset_key, train_ver=True, batch_size=args.batch_size, cub_folder=args.cub_folder) data_loader_eval, _, _, _, _, _ = init_data_loader( args.dataset_key, train_ver=False, batch_size=args.batch_size,
def train_sdae(batch_size, learning_rate, num_epochs, model_class, dataset_key, noise_type, zero_frac, gaussian_stdev, sp_frac, restore_path, save_path, log_freq, olshausen_path, olshausen_step_size, weight_decay, loss_type, emph_wt_a, emph_wt_b, vae_reconstruction_loss_type, cub_folder, learned_noise_wt, nt_restore_prefix, nt_save_prefix): # set up log folders if not os.path.exists('./01_original'): os.makedirs('./01_original') if not os.path.exists('./02_noisy'): os.makedirs('./02_noisy') if not os.path.exists('./03_output'): os.makedirs('./03_output') if not os.path.exists('./04_filters'): os.makedirs('./04_filters') if not os.path.exists('./05_stdev'): os.makedirs('./05_stdev') # set up model and criterion model = init_model(model_class, restore_path, restore_required=False) if isinstance(model, modules.SVAE): criterion = init_loss( 'vae', reconstruction_loss_type=vae_reconstruction_loss_type) else: criterion = init_loss(loss_type) if len(learned_noise_wt) < model.num_blocks: len_diff = model.num_blocks - len(learned_noise_wt) learned_noise_wt.extend(learned_noise_wt[-1:] * len_diff) # load data data_loader, sample_c, sample_h, sample_w, data_minval, data_maxval = init_data_loader( dataset_key, True, batch_size, olshausen_path, olshausen_step_size, cub_folder) original_size = sample_c * sample_h * sample_w # training loop affected = None warning_displayed = False original, noisy, output = None, None, None for ae_idx in range(model.num_blocks): stdev = None nt_loss = None nt_optimizer = None noise_transformer = None if learned_noise_wt[ae_idx] > 0: noise_transformer = modules.NoiseTransformer(original_size) if torch.cuda.is_available(): noise_transformer = noise_transformer.cuda() if nt_restore_prefix is not None: nt_restore_path = '%s_%d.pth' % (nt_restore_prefix, ae_idx) if os.path.exists(nt_restore_path): noise_transformer.load_state_dict( torch.load(nt_restore_path)) print('restored noise transformer from %s' % nt_restore_path) else: print('warning: checkpoint %s not found, skipping...' % nt_restore_path) nt_optimizer = torch.optim.Adam(noise_transformer.parameters(), lr=learning_rate, weight_decay=weight_decay) # train one block at a time print('--------------------') print('training block %d/%d' % (ae_idx + 1, model.num_blocks)) print('--------------------') model_optimizer = torch.optim.Adam(model.get_block_parameters(ae_idx), lr=learning_rate, weight_decay=weight_decay) for epoch in range(num_epochs): mean_loss, total_num_examples = 0, 0 for batch_idx, data in enumerate(data_loader): original, _ = data original = original.float() if not model.is_convolutional: original = original.view(original.size(0), -1) if torch.cuda.is_available(): original = original.cuda() original = model.encode(original) if isinstance(model, modules.SVAE): original = original[ 1] # (sampled latent vector, mean, log_var) original = original.detach() # apply noise if learned_noise_wt[ae_idx] > 0: stdev = noise_transformer.compute_stdev(original) noisy = noise_transformer.apply_noise(original, stdev) else: if noise_type == 'mn': noisy, affected = zero_mask(original, zero_frac) elif noise_type == 'gs': noisy, affected = add_gaussian(original, gaussian_stdev) elif noise_type == 'sp': noisy, affected = salt_and_pepper( original, sp_frac, data_minval, data_maxval) else: if not warning_displayed: print('unrecognized noise type: %r' % (noise_type, )) print('using clean image as input') warning_displayed = True noisy = original noisy = noisy.detach() if torch.cuda.is_available(): noisy = noisy.cuda() # =============== forward =============== if isinstance(model, modules.SVAE): output, mean, log_var = model(noisy, ae_idx) loss = criterion(output, original, mean, log_var) batch_size_ = original.size( 0) # might be undersized last batch total_num_examples += batch_size_ # assumes `loss` is sum for batch mean_loss += (loss - mean_loss * batch_size_) / total_num_examples else: output = model(noisy, ae_idx) if (emph_wt_a != 1 or emph_wt_b != 1) and noise_type != 'gs': # emphasize corrupted dimensions in the loss loss = emph_wt_a * criterion(output[affected], original[affected]) + \ emph_wt_b * criterion(output[1 - affected], original[1 - affected]) else: loss = criterion(output, original) mean_loss += (loss - mean_loss) / ( batch_idx + 1) # assumes `loss` is mean for batch if learned_noise_wt[ae_idx] > 0: # encourage large standard deviations nt_loss = loss - learned_noise_wt[ae_idx] * torch.mean( stdev) # =============== backward ============== if learned_noise_wt[ae_idx] > 0: nt_optimizer.zero_grad() nt_loss.backward(retain_graph=True) nt_optimizer.step() model_optimizer.zero_grad() loss.backward() model_optimizer.step() # =================== log =================== print('epoch {}/{}, loss={:.6f}'.format(epoch + 1, num_epochs, mean_loss.item())) if epoch % log_freq == 0 or epoch == num_epochs - 1: # save images if ae_idx == 0: to_save = [ (to_img(original.data.cpu()), './01_original', 'original'), (to_img(noisy.data.cpu()), './02_noisy', 'noisy'), (to_img(output.data.cpu()), './03_output', 'output'), (to_img(model.get_first_layer_weights(as_tensor=True)), './04_filters', 'filters'), ] for img, folder, desc in to_save: save_image_wrapper( img, os.path.join(folder, '{}_{}.png'.format(desc, epoch + 1))) # save learned stdev if learned_noise_wt[ae_idx] > 0: stdev_path = os.path.join( './05_stdev', 'stdev_{}_{}.txt'.format(ae_idx, epoch + 1)) np.savetxt(stdev_path, stdev.data.cpu().numpy(), fmt='%.18f') print('[o] saved stdev to %s' % stdev_path) # save model(s) torch.save(model.state_dict(), save_path) print('[o] saved model to %s' % save_path) if learned_noise_wt[ae_idx] > 0 and nt_save_prefix is not None: nt_save_path = '%s_%d.pth' % (nt_save_prefix, ae_idx) torch.save(noise_transformer.state_dict(), nt_save_path) print('[o] saved lvl-%d noise transformer to %s' % (ae_idx, nt_save_path)) model.num_trained_blocks += 1 original_size = model.get_enc_out_features(ae_idx) plot_first_layer_weights(model)
finetune_imgs = f"/home/admin/segmentation/task2/data/gen/images" finetune_masks = f"/home/admin/segmentation/task2/data/gen/masks" fintune_transform = alb.Compose([ alb.Resize(512,512), ToTensorV2(), ]) finetune_dataset = AerialDataset("train", "gen", finetune_imgs, finetune_masks, transform=fintune_transform) finetune_loader = DataLoader(finetune_dataset, batch_size=16, pin_memory=True, drop_last=True) eval_imgs = f"/home/admin/segmentation/task2/data/vaihingen/train/cropped/images/val" eval_masks = f"/home/admin/segmentation/task2/data/vaihingen/train/cropped/masks/val" eval_dataset = AerialDataset("val", "vaihingen", eval_imgs, eval_masks) eval_loader = DataLoader(eval_dataset, batch_size=16, pin_memory=True, drop_last=True) _loss_fn = init_loss(config["loss_fn"]) loss_fn = LossWithAux(_loss_fn) _optimizer = init_optimizer(config) optimizer = _optimizer(model.parameters(), lr = config["learning_rate"]) trainer = engine.create_supervised_trainer( model = model, optimizer = optimizer, loss_fn = loss_fn, device = device, non_blocking = True, ) evaluator = engine.create_supervised_evaluator( model = model, metrics={
# set up log folders if not os.path.exists('./01_original'): os.makedirs('./01_original') if not os.path.exists('./02_noisy'): os.makedirs('./02_noisy') if not os.path.exists('./03_output'): os.makedirs('./03_output') save_dir = os.path.dirname(save_path) if save_dir and not os.path.exists(save_dir): os.makedirs(save_dir) # set up model and criterion model = init_model(model_class, restore_path, restore_required=False, latent_dim=512) criterion = init_loss('vae', reconstruction_loss_type='mse') # load data data_loader, _, _, _, data_minval, data_maxval = \ init_data_loader(dataset_key, batch_size, dataset_path) # training loop warning_displayed = False original, noisy, output = None, None, None model_optimizer = torch.optim.Adam( model.parameters(), lr=learning_rate, weight_decay=weight_decay) for epoch in range(num_epochs): mean_loss, total_num_examples = 0, 0 for data in data_loader: original = data.float()
def train(): # initiate command line arguments, configuration file and logging block args = parse_args() config = read_config() try: if args.overwrite: shutil.rmtree(f"./logs/{args.name}", ignore_errors=True) os.mkdir(f"./logs/{args.name}") except: print(f"log folder {args.name} already exits.") init_logging(log_path=f"./logs/{args.name}") # determine train model on which device, cuda or cpu device = 'cuda' if torch.cuda.is_available() else 'cpu' logger.info(f"running training on {device}") device += f':{args.main_cuda}' # prepare training and validation datasets logger.info('creating dataset and data loaders') dataset = args.dataset train_dataset = AerialDataset("train", dataset, config[dataset]["train"]["image_path"], config[dataset]["train"]["mask_path"]) val_dataset = AerialDataset("val", dataset, config[dataset]["val"]["image_path"], config[dataset]["val"]["mask_path"]) train_loader, train_metrics_loader, val_metrics_loader = create_data_loaders( train_dataset=train_dataset, val_dataset=val_dataset, num_workers=config["num_workers"], batch_size=config["batchsize"], ) # create model logger.info( f'creating BiseNetv2 and optimizer with initial lr of {config["learning_rate"]}' ) model = BiSeNetV2(config["n_classes"]) model = nn.DataParallel(model, device_ids=[x for x in range(args.main_cuda, 4) ]).to(device) # initiate loss function and optimizer optimizer_fn = init_optimizer(config) optimizer = optimizer_fn(model.parameters(), lr=config["learning_rate"]) logger.info('creating trainer and evaluator engines') _loss_fn = init_loss(config["loss_fn"]) loss_fn = LossWithAux(_loss_fn) # create trainer and evaluator wiht ignite.engine trainer = engine.create_supervised_trainer( model=model, optimizer=optimizer, loss_fn=loss_fn, device=device, non_blocking=True, ) evaluator = engine.create_supervised_evaluator( model=model, metrics={ 'loss': metrics.Loss(nn.CrossEntropyLoss()), "[email protected]": metrics.Accuracy(thresholded_transform(0.3)), "[email protected]": metrics.Accuracy(thresholded_transform(0.3)), "IOU": metrics.IoU( metrics.ConfusionMatrix(num_classes=config["n_classes"])), "mIOU": metrics.mIoU( metrics.ConfusionMatrix(num_classes=config["n_classes"])), }, device=device, non_blocking=True, output_transform=lambda x, y, y_pred: (torch.sigmoid(y_pred["out"]), y), ) # attach event listener to do post process after each iteration and epoch logger.info(f'creating summary writer with tag {config["model_tag"]}') writer = tensorboard.SummaryWriter(log_dir=f'logs/{config["model_tag"]}') # logger.info('attaching lr scheduler') # lr_scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) # attach_lr_scheduler(trainer, lr_scheduler, writer) logger.info('attaching event driven calls') attach_model_checkpoint(trainer, {config["model_tag"]: model.module}, args.name) attach_training_logger(trainer, writer=writer) attach_metric_logger(trainer, evaluator, 'train', train_metrics_loader, writer) attach_metric_logger(trainer, evaluator, 'val', val_metrics_loader, writer) # start training (evaluation is included too) logger.info('training...') trainer.run(train_loader, max_epochs=config["epochs"])