def main(): """Create the model and start the training.""" w, h = map(int, args.input_size.split(',')) args.input_size = (w, h) w, h = map(int, args.crop_size.split(',')) args.crop_size = (h, w) w, h = map(int, args.input_size_target.split(',')) args.input_size_target = (w, h) cudnn.enabled = True cudnn.benchmark = True str_ids = args.gpu_ids.split(',') gpu_ids = [] for str_id in str_ids: gid = int(str_id) if gid >=0: gpu_ids.append(gid) num_gpu = len(gpu_ids) args.multi_gpu = False if num_gpu>1: args.multi_gpu = True Trainer = AD_Trainer(args) Trainer.G = torch.nn.DataParallel( Trainer.G, gpu_ids) Trainer.D1 = torch.nn.DataParallel( Trainer.D1, gpu_ids) Trainer.D2 = torch.nn.DataParallel( Trainer.D2, gpu_ids) else: Trainer = AD_Trainer(args) print(Trainer) trainloader = data.DataLoader( cityscapesDataSet(args.data_dir, args.data_list, max_iters=args.num_steps * args.iter_size * args.batch_size, resize_size=args.input_size, crop_size=args.crop_size, scale=True, mirror=True, mean=IMG_MEAN, autoaug = args.autoaug), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True) trainloader_iter = enumerate(trainloader) targetloader = data.DataLoader(robotDataSet(args.data_dir_target, args.data_list_target, max_iters=args.num_steps * args.iter_size * args.batch_size, resize_size=args.input_size_target, crop_size=args.crop_size, scale=False, mirror=args.random_mirror, mean=IMG_MEAN, set=args.set, autoaug = args.autoaug_target), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True) targetloader_iter = enumerate(targetloader) # set up tensor board if args.tensorboard: args.log_dir += '/'+ os.path.basename(args.snapshot_dir) if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) writer = SummaryWriter(args.log_dir) for i_iter in range(args.num_steps): loss_seg_value1 = 0 loss_adv_target_value1 = 0 loss_D_value1 = 0 loss_seg_value2 = 0 loss_adv_target_value2 = 0 loss_D_value2 = 0 adjust_learning_rate(Trainer.gen_opt , i_iter, args) adjust_learning_rate_D(Trainer.dis1_opt, i_iter, args) adjust_learning_rate_D(Trainer.dis2_opt, i_iter, args) for sub_i in range(args.iter_size): # train G # train with source _, batch = trainloader_iter.__next__() _, batch_t = targetloader_iter.__next__() images, labels, _, _ = batch images = images.cuda() labels = labels.long().cuda() images_t, labels_t, _, _ = batch_t images_t = images_t.cuda() labels_t = labels_t.long().cuda() with Timer("Elapsed time in update: %f"): loss_seg1, loss_seg2, loss_adv_target1, loss_adv_target2, loss_me, loss_kl, pred1, pred2, pred_target1, pred_target2, val_loss = Trainer.gen_update(images, images_t, labels, labels_t, i_iter) loss_seg_value1 += loss_seg1.item() / args.iter_size loss_seg_value2 += loss_seg2.item() / args.iter_size loss_adv_target_value1 += loss_adv_target1 / args.iter_size loss_adv_target_value2 += loss_adv_target2 / args.iter_size loss_me_value = loss_me if args.lambda_adv_target1 > 0 and args.lambda_adv_target2 > 0: loss_D1, loss_D2 = Trainer.dis_update(pred1, pred2, pred_target1, pred_target2) loss_D_value1 += loss_D1.item() loss_D_value2 += loss_D2.item() else: loss_D_value1 = 0 loss_D_value2 = 0 del pred1, pred2, pred_target1, pred_target2 if args.tensorboard: scalar_info = { 'loss_seg1': loss_seg_value1, 'loss_seg2': loss_seg_value2, 'loss_adv_target1': loss_adv_target_value1, 'loss_adv_target2': loss_adv_target_value2, 'loss_me_target': loss_me_value, 'loss_kl_target': loss_kl, 'loss_D1': loss_D_value1, 'loss_D2': loss_D_value2, 'val_loss': val_loss, } if i_iter % 100 == 0: for key, val in scalar_info.items(): writer.add_scalar(key, val, i_iter) print('exp = {}'.format(args.snapshot_dir)) print( '\033[1m iter = %8d/%8d \033[0m loss_seg1 = %.3f loss_seg2 = %.3f loss_me = %.3f loss_kl = %.3f loss_adv1 = %.3f, loss_adv2 = %.3f loss_D1 = %.3f loss_D2 = %.3f, val_loss=%.3f'%(i_iter, args.num_steps, loss_seg_value1, loss_seg_value2, loss_me_value, loss_kl, loss_adv_target_value1, loss_adv_target_value2, loss_D_value1, loss_D_value2, val_loss)) # clear loss del loss_seg1, loss_seg2, loss_adv_target1, loss_adv_target2, loss_me, loss_kl, val_loss if i_iter >= args.num_steps_stop - 1: print('save model ...') torch.save(Trainer.G.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(args.num_steps_stop) + '.pth')) torch.save(Trainer.D1.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(args.num_steps_stop) + '_D1.pth')) torch.save(Trainer.D2.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(args.num_steps_stop) + '_D2.pth')) break if i_iter % args.save_pred_every == 0 and i_iter != 0: print('taking snapshot ...') torch.save(Trainer.G.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '.pth')) torch.save(Trainer.D1.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D1.pth')) torch.save(Trainer.D2.state_dict(), osp.join(args.snapshot_dir, 'GTA5_' + str(i_iter) + '_D2.pth')) if args.tensorboard: writer.close()
def main(): """Create the model and start the training.""" print("NUMBER OF CLASSES: ", str(args.num_classes)) w, h = map(int, args.input_size.split(',')) args.input_size = (w, h) w, h = map(int, args.crop_size.split(',')) args.crop_size = (h, w) w, h = map(int, args.gt_size.split(',')) args.gt_size = (w, h) cudnn.enabled = True cudnn.benchmark = True # create result dir if not os.path.exists(args.result_dir): os.makedirs(args.result_dir) str_ids = args.gpu_ids.split(',') gpu_ids = [] for str_id in str_ids: gid = int(str_id) if gid >=0: gpu_ids.append(gid) num_gpu = len(gpu_ids) args.multi_gpu = False if num_gpu>1: args.multi_gpu = True Trainer = AD_Trainer(args) else: Trainer = AD_Trainer(args) TARGET_IMG_MEAN = np.array((104.00698793,116.66876762,122.67891434), dtype=np.float32) trainloader = data.DataLoader( cityscapesDataSet(args.data_dir, args.data_list, max_iters=args.num_steps * args.batch_size, resize_size=args.input_size, crop_size=args.crop_size, set=args.set, scale=False, mirror=args.random_mirror, mean=TARGET_IMG_MEAN, autoaug = args.autoaug_target, source_domain=args.source_domain), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True) trainloader_iter = enumerate(trainloader) ''' trainloader = data.DataLoader( gta5DataSet(args.data_dir, args.data_list, max_iters=args.num_steps * args.batch_size, resize_size=args.input_size, crop_size=args.crop_size, scale=True, mirror=True, mean=TARGET_IMG_MEAN, autoaug = args.autoaug), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True) trainloader_iter = enumerate(trainloader) trainloader = data.DataLoader( synthiaDataSet(args.data_dir, args.data_list, max_iters=args.num_steps * args.batch_size, resize_size=args.input_size, crop_size=args.crop_size, scale=True, mirror=True, mean=TARGET_IMG_MEAN, autoaug = args.autoaug), batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, pin_memory=True, drop_last=True) trainloader_iter = enumerate(trainloader)''' # set up tensor board if args.tensorboard: args.log_dir += '/'+ os.path.basename(args.snapshot_dir) if not os.path.exists(args.log_dir): os.makedirs(args.log_dir) writer = SummaryWriter(args.log_dir) # load mIOU best_mIoUs = args.mIOU for i_iter in range(args.num_steps): loss_seg_value = 0 adjust_learning_rate(Trainer.gen_opt , i_iter, args) _, batch = trainloader_iter.__next__() images, labels, _, _ = batch images = images.cuda() labels = labels.long().cuda() with Timer("Elapsed time in update: %f"): loss_seg = Trainer.gen_update(images, labels, i_iter) loss_seg_value += loss_seg.item() if args.tensorboard: scalar_info = { 'loss_seg': loss_seg_value } if i_iter % 100 == 0: for key, val in scalar_info.items(): writer.add_scalar(key, val, i_iter) print('\033[1m iter = %8d/%8d \033[0m loss_seg = %.3f' %(i_iter, args.num_steps, loss_seg_value)) del loss_seg if i_iter % args.save_pred_every == 0 and i_iter != 0: mIoUs, _ = evaluate(args, args.gt_dir, args.gt_list, args.result_dir, Trainer.G) writer.add_scalar('mIOU', round(np.nanmean(mIoUs)*100, 2), int(i_iter/args.save_pred_every)) # (TB) if round(np.nanmean(mIoUs) * 100, 2) > best_mIoUs: print('save model ...') best_mIoUs = round(np.nanmean(mIoUs) * 100, 2) torch.save(Trainer.G.state_dict(), osp.join(args.snapshot_dir, 'supervised_seg_' + str(i_iter) + '.pth')) if args.tensorboard: writer.close()