def main(): datasets = get_datasets(in_dims=('speed', 'available', 'total', 'speed_ha'), out_dims=('speed',)) dls = get_dataloaders(datasets, batch_size=16, collate_fn=bj_collate_fn) model = Ours(n_in=4, n_out=1) save_folder = args.save_folder trainer = BJTrainer( model, loss=get_loss(args.loss), device=torch.device(args.cuda), optimizer=get_optimizer(args.optimizer, model.parameters(), lr=args.lr), in_scalar=ZScoreScaler( mean=torch.tensor([34.71207, 0.55837995, 1.454227, 35.422764, 0.57980937, 1.4051558], dtype=torch.float32), std=torch.tensor([11.989664, 0.28689522, 0.5432855, 9.341317, 0.15121026, 0.4632336], dtype=torch.float32) ), out_scalar=ZScoreScaler( mean=torch.tensor([0.55837995], dtype=torch.float32), std=torch.tensor([0.28689522], dtype=torch.float32) ), max_grad_norm=args.max_grad_norm ) if not args.test: train_model( dls, folder=save_folder, trainer=trainer, scheduler=None, epochs=args.epochs, early_stop_steps=args.early_stop_steps, use_checkpoint=args.resume ) test_model( dls['test'], trainer=trainer, folder=save_folder, )
# Images loading setup tset = Dataset(TLIST, KTLIST, BSZ, niter, rand_kernel=False) vset = Dataset(VLIST, KVLIST, BSZ, 0, isval=True) batch, swpT, swpV = tvSwap(tset, vset) imgs, left_kernels, right_kernels, left_ck, right_ck, seeds = batch # Generate blurry images left_blurs = proc.gen_blur(imgs, left_kernels, nstd=2, seeds=seeds[:,:2]) right_blurs = proc.gen_blur(imgs, right_kernels, nstd=2, seeds=seeds[:,2:]) # Deblur, same model for left and right images left_deblurs = left_blurs + model.generate(left_blurs) right_deblurs = right_blurs + model.generate(right_blurs) # Paired Loss loss, lvals, lnms = get_loss(left_blurs, right_blurs, left_deblurs, right_deblurs, left_kernels, right_kernels, nstd=0) # Stop gradients left_deblurs = tf.stop_gradient(left_deblurs) right_deblurs = tf.stop_gradient(right_deblurs) # Reblur, with random kernels left_reblurs = proc.gen_blur(left_deblurs, left_ck, nstd=2, seeds=None) right_reblurs = proc.gen_blur(right_deblurs, right_ck, nstd=2, seeds=None) # Deblur, again left_cycle = left_reblurs + model.generate(left_reblurs) right_cycle = right_reblurs + model.generate(right_reblurs) # Cycle loss
def train(): set_device() output_dir = pathlib.Path(cfg.LOG_DIR) output_dir.mkdir(exist_ok=True, parents=True) logger = create_logger(name=__name__, output_dir=output_dir, filename='log.txt') # 数据集加载 train_dataset = get_dataset(cfg.DATASET.TRAIN_DATA, cfg, is_training=True) val_dataset = get_dataset(cfg.DATASET.VAL_DATA, cfg) for batch, (images, labels) in enumerate(train_dataset): for i in range(cfg.TRAIN.BATCH_SIZE): img = np.array(images[i, :, :, :] * 255).astype(np.int64) label = np.array(labels[i, :, :, 0]).astype(np.int64) vis_segmentation(img, label, label_names=cfg.DATASET.LABELS) # 模型搭建和损失函数配置 model = create_model(cfg, name=cfg.MODEL_NAME, backbone=cfg.BACKBONE_NAME) model = add_regularization(model, tf.keras.regularizers.l2(cfg.LOSS.WEIGHT_DECAY)) model.summary() loss = get_loss(cfg, cfg.LOSS.TYPE) # 优化器和学习率配置 lr = tf.Variable(cfg.SCHEDULER.LR_INIT) learning_rate = learning_rate_config(cfg) # warmup策略 def lr_with_warmup(global_steps): lr_ = tf.cond(tf.less(global_steps, cfg.SCHEDULER.WARMUP_STEPS), lambda: cfg.SCHEDULER.LR_INIT * tf.cast((global_steps + 1) / cfg.SCHEDULER.WARMUP_STEPS, tf.float32), lambda: tf.maximum(learning_rate(global_steps - cfg.SCHEDULER.WARMUP_STEPS), cfg.SCHEDULER.LR_LOWER_BOUND)) return lr_ optimizer = config_optimizer(cfg, learning_rate=lr) # 模型保存与恢复 manager, ckpt = ckpt_manager(cfg, model, logger, optimizer) # 训练与验证静态图 @tf.function def train_one_batch(x, y): with tf.GradientTape() as tape: # 1、计算模型输出和损失 pred_o = model(x, training=True) # pred_o, l2, l3, l4, l5 = model(x, training=True) regularization_loss_out = tf.reduce_sum(model.losses) # seg_loss_out = loss(y, pred_o) + 0.1 * (loss(y, l2) + loss(y, l3) + loss(y, l4) + loss(y, l5)) seg_loss_out = loss(y, pred_o) total_loss_out = seg_loss_out + regularization_loss_out # 计算梯度以及更新梯度, 固定用法 grads = tape.gradient(total_loss_out, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) return total_loss_out, seg_loss_out, pred_o @tf.function def val_one_batch(x, y): # pred_o, _, _, _, _ = model(x, training=False) pred_o = model(x, training=False) return pred_o # region # 记录器和评价指标 summary_writer = tf.summary.create_file_writer(cfg.LOG_DIR) # tf.summary.trace_on(profiler=True) # 开启Trace(可选) # 评价指标 val_metric = SegmentationMetric(cfg.DATASET.N_CLASSES) train_metric = SegmentationMetric(cfg.DATASET.N_CLASSES) val_metric.reset() train_metric.reset() # endregion # region # 迭代优化 for _ in range(int(ckpt.step), cfg.TRAIN.EPOCHS): # region # 训练集 ckpt.step.assign_add(1) lr.assign(lr_with_warmup(optimizer.iterations)) # 必须使用assign才能改变optimizer的lr的值,否则,是个固定值 for batch, (images_batch, labels_batch) in tqdm(enumerate(train_dataset)): total_loss, seg_loss, train_pred = train_one_batch(images_batch, labels_batch) # 计算训练集精度 if int(ckpt.step) % cfg.TRAIN.SNAP_SHOT == 1: train_out = np.argmax(train_pred, axis=-1) for i in range(labels_batch.shape[0]): train_label = np.array(labels_batch[i, :, :, 0]).astype(np.int64) train_metric.addBatch(train_label, train_out[i, :, :]) # if epoch > 200: with summary_writer.as_default(): # 指定记录器 tf.summary.scalar("train/total_losses", total_loss, step=optimizer.iterations) # 将当前损失函数的值写入记录器 tf.summary.scalar("train/segmentation_loss_loss", seg_loss, step=optimizer.iterations) tf.summary.scalar("train/learning_rate", lr, step=optimizer.iterations) # endregion # region # 验证集 if int(ckpt.step) % cfg.TRAIN.SNAP_SHOT == 1: # ----------------------------------------------验证集验证-------------------------------------------------------- for batch, (images_batch, labels_batch) in tqdm(enumerate(val_dataset)): out = val_one_batch(images_batch, labels_batch) out = np.squeeze(np.argmax(out, axis=-1)) labels_batch = np.array(labels_batch[0, :, :, 0]).astype(np.int64) val_metric.addBatch(labels_batch, out) with summary_writer.as_default(): tf.summary.scalar("val_metric/mPA", val_metric.meanPixelAccuracy(), step=int(ckpt.step)) tf.summary.scalar("val_metric/dice", val_metric.dice(), step=int(ckpt.step)) tf.summary.scalar("val_metric/IoU1", val_metric.IoU(1), step=int(ckpt.step)) tf.summary.scalar("val_metric/mIoU", val_metric.mIoU(), step=int(ckpt.step)) tf.summary.scalar("train_metric/mPA", train_metric.meanPixelAccuracy(), step=int(ckpt.step)) tf.summary.scalar("train_metric/mIoU", train_metric.mIoU(), step=int(ckpt.step)) tf.summary.scalar("train_metric/dice", train_metric.dice(), step=int(ckpt.step)) tf.summary.scalar("train_metric/IoU1", train_metric.IoU(1), step=int(ckpt.step)) # VAL_PA = val_metric.meanPixelAccuracy() logger.info('__EPOCH_{}__: TRAIN_mIoU: {:.5f}, TRAIN_mPA: {:.5f}, TRAIN_dice: {:.5f}; ' 'VAL_mIoU: {:.5f}, VAL_mPA: {:.5f}, VAL_dice: {:.5f}' .format(int(ckpt.step), train_metric.mIoU(), train_metric.meanPixelAccuracy(), train_metric.dice(), val_metric.mIoU(), val_metric.meanPixelAccuracy(), val_metric.dice())) train_metric.reset() val_metric.reset() # endregion # region # 模型保存 # 使用CheckpointManager保存模型参数到文件并自定义编号 manager.save(checkpoint_number=int(ckpt.step))
# Get compressing matrix, fixed mat = proc.load_mat(ratio) # Generate compressed images left_signal, left_proxy = proc.compress(proc.extract_blocks(lefts, CSZ), mat) right_signal, right_proxy = proc.compress(proc.extract_blocks(rights, CSZ), mat) # Reconstruct, same model for left and right images left_recon = model.generate(left_proxy) right_recon = model.generate(right_proxy) left_recon = proc.group_blocks(left_recon, IMSZ, CSZ) right_recon = proc.group_blocks(right_recon, IMSZ, CSZ) rec_loss, swap_loss = get_loss(left_signal, right_signal, left_recon, right_recon, shifts, mat, CSZ) loss = rec_loss + lmd * swap_loss lvals = [rec_loss, swap_loss] lnms = ['rec_loss', 'swap_loss'] # MSE mse1 = tf.reduce_mean(tf.squared_difference(lefts, left_recon), axis=(1, 2, 3)) mse2 = tf.reduce_mean(tf.squared_difference(rights, right_recon), axis=(1, 2, 3)) mse = tf.concat([mse1, mse2], axis=0) psnr = tf.reduce_mean(-10. * tf.log(mse) / tf.log(10.0)) mse = tf.reduce_mean(mse) lvals, lnms = lvals + [psnr], lnms + ['psnr'] tnms = [l + '.t' for l in lnms]
# Deblur and estimate kernels, same model for left and right images left_res, left_kout = model.generate(left_blurs) right_res, right_kout = model.generate(right_blurs) left_deblurs = left_blurs + left_res right_deblurs = right_blurs + right_res # Stop gradients for the kernel branch (in cycle 1) left_kout = tf.stop_gradient(left_kout) right_kout = tf.stop_gradient(right_kout) # Paired Loss loss, lvals, lnms = get_loss(left_blurs, right_blurs, left_deblurs, right_deblurs, left_kout, right_kout, nstd=0) # Stop gradients left_deblurs = tf.stop_gradient(left_deblurs) right_deblurs = tf.stop_gradient(right_deblurs) # Reblur, with random kernels left_reblurs = proc.gen_blur(left_deblurs, left_ck, nstd=2, seeds=None) right_reblurs = proc.gen_blur(right_deblurs, right_ck, nstd=2, seeds=None) # Deblur and estimate kernel, again left_cycle_res, left_cyclek = model.generate(left_reblurs) right_cycle_res, right_cyclek = model.generate(right_reblurs)