from dataloader import KITTIloader2015 as lk15 from dataloader import KITTIloader2012 as lk12 from dataloader import MiddleburyLoader as DA batch_size = args.batchsize scale_factor = args.maxdisp / 384. # controls training resolution all_left_img, all_right_img, all_left_disp, all_right_disp = ls.dataloader('%s/carla-highres/trainingF'%args.database) loader_carla = DA.myImageFloder(all_left_img,all_right_img,all_left_disp,right_disparity=all_right_disp, rand_scale=[0.225,0.6*scale_factor], order=2) all_left_img, all_right_img, all_left_disp, all_right_disp = ls.dataloader('%s/mb-ex-training/trainingF'%args.database) # mb-ex loader_mb = DA.myImageFloder(all_left_img,all_right_img,all_left_disp,right_disparity=all_right_disp, rand_scale=[0.225,0.6*scale_factor], order=0) all_left_img, all_right_img, all_left_disp, all_right_disp = lt.dataloader('%s/sceneflow/'%args.database) loader_scene = DA.myImageFloder(all_left_img,all_right_img,all_left_disp,right_disparity=all_right_disp, rand_scale=[0.9,2.4*scale_factor], order=2) all_left_img, all_right_img, all_left_disp,_,_,_ = lk15.dataloader('%s/kitti_scene/training/'%args.database,typ='train') # trainval loader_kitti15 = DA.myImageFloder(all_left_img,all_right_img,all_left_disp, rand_scale=[0.9,2.4*scale_factor], order=0) all_left_img, all_right_img, all_left_disp = lk12.dataloader('%s/data_stereo_flow/training/'%args.database) loader_kitti12 = DA.myImageFloder(all_left_img,all_right_img,all_left_disp, rand_scale=[0.9,2.4*scale_factor], order=0) all_left_img, all_right_img, all_left_disp, _ = ls.dataloader('%s/eth3d/'%args.database) loader_eth3d = DA.myImageFloder(all_left_img,all_right_img,all_left_disp, rand_scale=[0.9,2.4*scale_factor],order=0) data_inuse = torch.utils.data.ConcatDataset([loader_carla]*40 + [loader_mb]*500 + [loader_scene] + [loader_kitti15] + [loader_kitti12]*80 + [loader_eth3d]*1000) TrainImgLoader = torch.utils.data.DataLoader( data_inuse, batch_size= batch_size, shuffle= True, num_workers=batch_size, drop_last=True, worker_init_fn=_init_fn) print('%d batches per epoch'%(len(data_inuse)//batch_size))
virtual_kitti2 = False # added by CCJ on 2020/05/22: elif args.datatype == 'virtual_kt_2': print("processing Virtual KT 2!") from dataloader import KITTIloader_VirtualKT2 as ls train_file_list = args.vkt2_train_list val_file_list = args.vkt2_val_list virtual_kitti2 = True else: raise Exception("No suitable KITTI found ...") print('[??] args.datapath = ', args.datapath) all_left_img, all_right_img, all_left_disp, test_left_img, \ test_right_img, test_left_disp = ls.dataloader( args.datapath, train_file_list, val_file_list) TrainImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( all_left_img, all_right_img, all_left_disp, training=True, virtual_kitti2=virtual_kitti2), batch_size=args.batch_size, shuffle=True, num_workers=8, drop_last=False) TestImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( test_left_img, test_right_img,
type=int, default=1, metavar='S', help='random seed (default: 1)') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) if args.datatype == '2015': from dataloader import KITTIloader2015 as ls elif args.datatype == '2012': from dataloader import KITTIloader2012 as ls all_left_img, all_right_img, all_left_disp, test_left_img, test_right_img, test_left_disp = ls.dataloader( args.datapath) TrainImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( all_left_img, all_right_img, all_left_disp, True), batch_size=4, shuffle=True, num_workers=8, drop_last=False) TestImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( test_left_img, test_right_img, test_left_disp, False), batch_size=8, shuffle=False, num_workers=4, drop_last=False)
def main(): epoch_start = 0 max_acc = 0 max_epo = 0 # 数据集部分 all_left_img, all_right_img, all_left_disp, test_left_img, test_right_img, test_left_disp = ls.dataloader( args.datapath) TrainImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( all_left_img, all_right_img, all_left_disp, True), batch_size=args.batch_size, shuffle=True, num_workers=12, drop_last=True) TestImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( test_left_img, test_right_img, test_left_disp, False), batch_size=args.batch_size, shuffle=False, num_workers=4, drop_last=False) start_full_time = time.time() for epoch in range(epoch_start, args.epochs + 1): print("epoch:", epoch) # training total_train_loss = 0 for batch_idx, (imgL_crop, imgR_crop, disp_crop_L) in enumerate(TrainImgLoader): loss = train(imgL_crop, imgR_crop, disp_crop_L) print(loss) exit() total_train_loss += loss print('epoch:{}, step:{}, loss:{}'.format(epoch, batch_idx, loss)) print('epoch %d average training loss = %.3f' % (epoch, total_train_loss / len(TrainImgLoader))) # test total_test_three_pixel_error_rate = 0 for batch_idx, (imgL, imgR, disp_L) in enumerate(TestImgLoader): test_three_pixel_error_rate = test(imgL, imgR, disp_L) total_test_three_pixel_error_rate += test_three_pixel_error_rate print('epoch %d total 3-px error in val = %.3f' % (epoch, total_test_three_pixel_error_rate / len(TestImgLoader) * 100)) acc = (1 - total_test_three_pixel_error_rate / len(TestImgLoader)) * 100 if acc > max_acc: max_acc = acc max_epo = epoch savefilename = './kitti15.tar' # # savefilename = root_path + '/checkpoints/checkpoint_finetune_kitti15.tar' torch.save( { 'state_dict': model.state_dict(), 'total_train_loss': total_train_loss, 'epoch': epoch + 1, 'optimizer_state_dict': optimizer.state_dict(), 'max_acc': max_acc, 'max_epoch': max_epo }, savefilename) print("-- max acc checkpoint saved --") print('MAX epoch %d test 3 pixel correct rate = %.3f' % (max_epo, max_acc)) print('full finetune time = %.2f HR' % ((time.time() - start_full_time) / 3600)) print(max_epo) print(max_acc)
action='store_true', default=False, help='enables CUDA training') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') args = parser.parse_args() args.cuda = not args.no_cuda and torch.cuda.is_available() os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2,3" torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed_all(args.seed) test_left_img1, test_right_img1, test_left_disp1, test_left_img, test_right_img, test_left_disp = ls.dataloader( args.datapath) TestImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( test_left_img, test_right_img, test_left_disp, False), batch_size=1, shuffle=False, num_workers=4, drop_last=False) if args.model == 'ShuffleStereo8': model = MABNet_origin(args.maxdisp) elif args.model == 'ShuffleStereo16': model = ShuffleStereo16(args.maxdisp) else: print('no model')
torch.backends.cudnn.benchmark = False np.random.seed(seed) random.seed(seed) # seed deterministic_training(args.seed) ( all_left_img, all_right_img, all_left_disp, test_left_img, test_right_img, test_left_disp, ) = lt.dataloader(args.datapath) TrainImgLoader = torch.utils.data.DataLoader( DA.myImageFloder(all_left_img, all_right_img, all_left_disp, True), batch_size=8, shuffle=True, num_workers=8, drop_last=False, ) TestImgLoader = torch.utils.data.DataLoader( DA.myImageFloder(test_left_img, test_right_img, test_left_disp, False), batch_size=1, shuffle=False, num_workers=1, drop_last=False,
def main(): global args log = logger.setup_logger(args.save_path + '/training.log') # train_left_img, train_right_img, train_left_disp, test_left_img, test_right_img, test_left_disp, test_fn = ls.dataloader( # args.datapath,log, args.split_file) # # TrainImgLoader = torch.utils.data.DataLoader( # DA.myImageFloder(train_left_img, train_right_img, train_left_disp, True), # batch_size=args.train_bsize, shuffle=True, num_workers=4, drop_last=False) # # TestImgLoader = torch.utils.data.DataLoader( # DA.myImageFloder(test_left_img, test_right_img, test_left_disp, False, test_fn), # batch_size=args.test_bsize, shuffle=False, num_workers=4, drop_last=False) train_left_img, train_right_img, train_left_disp, test_left_img, test_right_img, left_val_disp, val_fn, left_train_semantic, left_val_semantic = ls.dataloader( args.datapath, log, args.split_file) TrainImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( train_left_img, train_right_img, train_left_disp, left_train_semantic, True), batch_size=args.train_bsize, shuffle=True, num_workers=4, drop_last=False) TestImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( test_left_img, test_right_img, left_val_disp, left_val_semantic, False, val_fn), batch_size=args.test_bsize, shuffle=False, num_workers=4, drop_last=False) if not os.path.isdir(args.save_path): os.makedirs(args.save_path) for key, value in sorted(vars(args).items()): log.info(str(key) + ': ' + str(value)) model = models.anynet.AnyNet(args) model = nn.DataParallel(model).cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999)) log.info('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) if args.pretrained: if os.path.isfile(args.pretrained): checkpoint = torch.load(args.pretrained) model.load_state_dict(checkpoint['state_dict'], strict=False) log.info("=> loaded pretrained model '{}'".format(args.pretrained)) else: log.info("=> no pretrained model found at '{}'".format( args.pretrained)) log.info("=> Will start from scratch.") args.start_epoch = 0 if args.resume: if os.path.isfile(args.resume): log.info("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) log.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: log.info("=> no checkpoint found at '{}'".format(args.resume)) log.info("=> Will start from scratch.") else: log.info('Not Resume') cudnn.benchmark = True test(TestImgLoader, model, log) return
def main(): parser = argparse.ArgumentParser(description='HSM-Net') parser.add_argument('--maxdisp', type=int, default=384, help='maxium disparity') parser.add_argument('--name', default='name') parser.add_argument('--database', default='/data/private', help='data path') parser.add_argument('--epochs', type=int, default=10, help='number of epochs to train') parser.add_argument( '--batch_size', type=int, default=16, # when maxdisp is 768, 18 is the most you can fit in 2 V100s (with syncBN on) help='samples per batch') parser.add_argument( '--val_batch_size', type=int, default=4, # when maxdisp is 768, 18 is the most you can fit in 2 V100s (with syncBN on) help='samples per batch') parser.add_argument('--loadmodel', default=None, help='weights path') parser.add_argument('--log_dir', default="/data/private/logs/high-res-stereo") # parser.add_argument('--savemodel', default=os.path.join(os.getcwd(),'/trained_model'), # help='save path') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--val_epoch', type=int, default=4) parser.add_argument('--save_epoch', type=int, default=10) parser.add_argument("--val", action="store_true", default=False) parser.add_argument("--save_numpy", action="store_true", default=False) parser.add_argument("--testres", type=float, default=1.8) parser.add_argument("--threshold", type=float, default=0.7) parser.add_argument("--use_pseudoGT", default=False, action="store_true") parser.add_argument("--lr", default=1e-3, type=float) parser.add_argument("--lr_decay", default=2, type=int) parser.add_argument("--gpu", default=[0], nargs="+") parser.add_argument("--no_aug", default=False, action="store_true") args = parser.parse_args() torch.manual_seed(args.seed) torch.manual_seed(args.seed) # set again torch.cuda.manual_seed(args.seed) batch_size = args.batch_size scale_factor = args.maxdisp / 384. # controls training resolution args.name = args.name + "_" + time.strftime('%l:%M%p_%Y%b%d').strip(" ") gpu = [] for i in args.gpu: gpu.append(int(i)) args.gpu = gpu root_dir = "/data/private/KITTI_raw/2011_09_26/2011_09_26_drive_0013_sync" disp_dir = "final-768px_testres-3.3/disp" entp_dir = "final-768px_testres-3.3/entropy" mode = "image" image_name = "0000000040.npy" #* this is the 4th image in the validation set train_left, train_right, train_disp, train_entp = kitti_raw_loader( root_dir, disp_dir, entp_dir, mode=mode, image_name=image_name) train_left = train_left * args.batch_size * 16 train_right = train_right * args.batch_size * 16 train_disp = train_disp * args.batch_size * 16 train_entp = train_entp * args.batch_size * 16 all_left_img, all_right_img, all_left_disp, left_val, right_val, disp_val_L = lk15.dataloader( '%s/KITTI2015/data_scene_flow/training/' % args.database, val=args.val) left_val = [left_val[3]] right_val = [right_val[3]] disp_val_L = [disp_val_L[3]] loader_kitti15 = DA.myImageFloder(train_left, train_right, train_disp, rand_scale=[0.9, 2.4 * scale_factor], order=0, use_pseudoGT=args.use_pseudoGT, entropy_threshold=args.threshold, left_entropy=train_entp, no_aug=args.no_aug) val_loader_kitti15 = DA.myImageFloder(left_val, right_val, disp_val_L, is_validation=True, testres=args.testres) train_data_inuse = loader_kitti15 val_data_inuse = val_loader_kitti15 # ! For internal bug in Pytorch, if you are going to set num_workers >0 in one dataloader, it must also be set to # ! n >0 for the other data loader as well (ex. 1 for valLoader and 10 for trainLoader) ValImgLoader = torch.utils.data.DataLoader( val_data_inuse, drop_last=False, batch_size=args.val_batch_size, shuffle=False, worker_init_fn=_init_fn, num_workers=args.val_batch_size) # TrainImgLoader = torch.utils.data.DataLoader( train_data_inuse, batch_size=batch_size, shuffle=True, drop_last=True, worker_init_fn=_init_fn, num_workers=args.batch_size) # , , worker_init_fn=_init_fn print('%d batches per epoch' % (len(train_data_inuse) // batch_size)) model = hsm(args.maxdisp, clean=False, level=1) if len(args.gpu) > 1: from sync_batchnorm.sync_batchnorm import convert_model model = nn.DataParallel(model, device_ids=args.gpu) model = convert_model(model) else: model = nn.DataParallel(model, device_ids=args.gpu) model.cuda() # load model if args.loadmodel is not None: print("loading pretrained model: " + str(args.loadmodel)) pretrained_dict = torch.load(args.loadmodel) pretrained_dict['state_dict'] = { k: v for k, v in pretrained_dict['state_dict'].items() if ('disp' not in k) } model.load_state_dict(pretrained_dict['state_dict'], strict=False) print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999)) log = logger.Logger(args.log_dir, args.name, save_numpy=args.save_numpy) total_iters = 0 val_sample_count = 0 val_batch_count = 0 save_path = os.path.join(args.log_dir, os.path.join(args.name, "saved_model")) os.makedirs(save_path, exist_ok=True) for epoch in range(1, args.epochs + 1): total_train_loss = 0 train_score_accum_dict = { } # accumulates scores throughout a batch to get average score train_score_accum_dict["num_scored"] = 0 adjust_learning_rate(optimizer, args.lr, args.lr_decay, epoch, args.epochs, decay_rate=0.1) print('Epoch %d / %d' % (epoch, args.epochs)) # SAVE if epoch != 1 and epoch % args.save_epoch == 0: print("saving weights at epoch: " + str(epoch)) savefilename = os.path.join(save_path, 'ckpt_' + str(total_iters) + '.tar') torch.save( { 'iters': total_iters, 'state_dict': model.state_dict(), 'train_loss': total_train_loss / len(TrainImgLoader), "optimizer": optimizer.state_dict() }, savefilename) ## val ## if epoch == 1 or epoch % args.val_epoch == 0: print("validating at epoch: " + str(epoch)) val_score_accum_dict = {} val_img_idx = 0 for batch_idx, (imgL_crop, imgR_crop, disp_crop_L) in enumerate(ValImgLoader): vis, scores_list, err_map_list = val_step( model, imgL_crop, imgR_crop, disp_crop_L, args.maxdisp, args.testres) for score, err_map in zip(scores_list, err_map_list): for (score_tag, score_val), (map_tag, map_val) in zip(score.items(), err_map.items()): log.scalar_summary( "val/im_" + str(val_img_idx) + "/" + score_tag, score_val, val_sample_count) log.image_summary("val/" + map_tag, map_val, val_sample_count) if score_tag not in val_score_accum_dict.keys(): val_score_accum_dict[score_tag] = 0 val_score_accum_dict[score_tag] += score_val val_img_idx += 1 val_sample_count += 1 log.image_summary('val/left', imgL_crop[0:1], val_sample_count) log.image_summary('val/right', imgR_crop[0:1], val_sample_count) log.disp_summary('val/gt0', disp_crop_L[0:1], val_sample_count) # <-- GT disp log.entp_summary('val/entropy', vis['entropy'], val_sample_count) log.disp_summary('val/output3', vis['output3'][0], val_sample_count) for score_tag, score_val in val_score_accum_dict.items(): log.scalar_summary("val/" + score_tag + "_batch_avg", score_val, epoch) ## training ## for batch_idx, (imgL_crop, imgR_crop, disp_crop_L) in enumerate(TrainImgLoader): print("training at epoch: " + str(epoch)) is_scoring = total_iters % 10 == 0 loss, vis, scores_list, maps = train_step(model, optimizer, imgL_crop, imgR_crop, disp_crop_L, args.maxdisp, is_scoring=is_scoring) total_train_loss += loss if is_scoring: log.scalar_summary('train/loss_batch', loss, total_iters) for score in scores_list: for tag, val in score.items(): log.scalar_summary("train/" + tag + "_batch", val, total_iters) if tag not in train_score_accum_dict.keys(): train_score_accum_dict[tag] = 0 train_score_accum_dict[tag] += val train_score_accum_dict[ "num_scored"] += imgL_crop.shape[0] for tag, err_map in maps[0].items(): log.image_summary("train/" + tag, err_map, total_iters) if total_iters % 10 == 0: log.image_summary('train/left', imgL_crop[0:1], total_iters) log.image_summary('train/right', imgR_crop[0:1], total_iters) log.disp_summary('train/gt0', disp_crop_L[0:1], total_iters) # <-- GT disp log.entp_summary('train/entropy', vis['entropy'][0:1], total_iters) log.disp_summary('train/output3', vis['output3'][0:1], total_iters) total_iters += 1 log.scalar_summary('train/loss', total_train_loss / len(TrainImgLoader), epoch) for tag, val in train_score_accum_dict.items(): log.scalar_summary("train/" + tag + "_avg", val / train_score_accum_dict["num_scored"], epoch) torch.cuda.empty_cache() # Save final checkpoint print("Finished training!\n Saving the last checkpoint...") savefilename = os.path.join(save_path, 'final' + '.tar') torch.save( { 'iters': total_iters, 'state_dict': model.state_dict(), 'train_loss': total_train_loss / len(TrainImgLoader), "optimizer": optimizer.state_dict() }, savefilename)
def main(): global args log = logger.setup_logger(args.save_path + '/training.log') train_left_img, train_right_img, train_left_disp, test_left_img, test_right_img, test_left_disp = ls.dataloader( args.datapath, log, args.split_file) n_train = int(len(train_left_img)) TrainImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( train_left_img, train_right_img, train_left_disp, True), batch_size=args.train_bsize, shuffle=True, num_workers=4, drop_last=False) TestImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( test_left_img, test_right_img, test_left_disp, False), batch_size=args.test_bsize, shuffle=False, num_workers=4, drop_last=False) if not os.path.isdir(args.save_path): os.makedirs(args.save_path) for key, value in sorted(vars(args).items()): log.info(str(key) + ': ' + str(value)) model = models.anynet.AnyNet(args) torch.save(model, './model_para.pth') model = nn.DataParallel(model).cuda() torch.manual_seed(2.0) left = torch.randn(1, 3, 256, 512) right = torch.randn(1, 3, 256, 512) with SummaryWriter(comment='AnyNet_model_stracture') as w: w.add_graph(model, ( left, right, )) optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999)) log.info('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) args.start_epoch = 0 if args.resume: #训练中断后,继续加载 if os.path.isfile(args.resume): log.info("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) #模型加载, optimizer.load_state_dict(checkpoint['optimizer']) #优化器加载 log.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: log.info("=> no checkpoint found at '{}'".format(args.resume)) log.info("=> Will start from scratch.") else: log.info('Not Resume') cudnn.benchmark = True start_full_time = time.time() train(TrainImgLoader, model, optimizer, log, n_train, TestImgLoader) #开始进行模型训练 test(TestImgLoader, model, log) log.info('full training time = {:.2f} Hours'.format( (time.time() - start_full_time) / 3600))
def get_training_dataloader(maxdisp, dataset_folder): scale_factor = maxdisp / 384. all_left_img, all_right_img, all_left_disp, all_right_disp = ls.dataloader( '%s/hrvs/carla-highres/trainingF' % dataset_folder) loader_carla = DA.myImageFloder(all_left_img, all_right_img, all_left_disp, right_disparity=all_right_disp, rand_scale=[0.225, 0.6 * scale_factor], rand_bright=[0.8, 1.2], order=2) all_left_img, all_right_img, all_left_disp, all_right_disp = ls.dataloader( '%s/middlebury/mb-ex-training/trainingF' % dataset_folder) # mb-ex loader_mb = DA.myImageFloder(all_left_img, all_right_img, all_left_disp, right_disparity=all_right_disp, rand_scale=[0.225, 0.6 * scale_factor], rand_bright=[0.8, 1.2], order=0) all_left_img, all_right_img, all_left_disp, all_right_disp = lt.dataloader( '%s/sceneflow/' % dataset_folder) loader_scene = DA.myImageFloder(all_left_img, all_right_img, all_left_disp, right_disparity=all_right_disp, rand_scale=[0.9, 2.4 * scale_factor], order=2) all_left_img, all_right_img, all_left_disp, _, _, _ = lk15.dataloader( '%s/kitti15/training/' % dataset_folder, typ='train') # change to trainval when finetuning on KITTI loader_kitti15 = DA.myImageFloder(all_left_img, all_right_img, all_left_disp, rand_scale=[0.9, 2.4 * scale_factor], order=0) all_left_img, all_right_img, all_left_disp = lk12.dataloader( '%s/kitti12/training/' % dataset_folder) loader_kitti12 = DA.myImageFloder(all_left_img, all_right_img, all_left_disp, rand_scale=[0.9, 2.4 * scale_factor], order=0) all_left_img, all_right_img, all_left_disp, _ = ls.dataloader( '%s/eth3d/' % dataset_folder) loader_eth3d = DA.myImageFloder(all_left_img, all_right_img, all_left_disp, rand_scale=[0.9, 2.4 * scale_factor], order=0) all_left_img, all_right_img, all_left_disp = lld.dataloader( '%s/lidar_dataset/train' % dataset_folder) loader_lidar = DA.myImageFloder(all_left_img, all_right_img, all_left_disp, rand_scale=[0.5, 1.25 * scale_factor], rand_bright=[0.8, 1.2], order=0) all_dataloaders = [{ 'name': 'lidar', 'dl': loader_lidar, 'count': 1 }, { 'name': 'hrvs', 'dl': loader_carla, 'count': 1 }, { 'name': 'middlebury', 'dl': loader_mb, 'count': 1 }, { 'name': 'sceneflow', 'dl': loader_scene, 'count': 1 }, { 'name': 'kitti12', 'dl': loader_kitti12, 'count': 1 }, { 'name': 'kitti15', 'dl': loader_kitti15, 'count': 1 }, { 'name': 'eth3d', 'dl': loader_eth3d, 'count': 1 }] max_count = 0 for dataloader in all_dataloaders: max_count = max(max_count, len(dataloader['dl'])) print('=' * 80) concat_dataloaders = [] for dataloader in all_dataloaders: dataloader['count'] = max(1, max_count // len(dataloader['dl'])) concat_dataloaders += [dataloader['dl']] * dataloader['count'] print('{name}: {size} (x{count})'.format(name=dataloader['name'], size=len(dataloader['dl']), count=dataloader['count'])) data_inuse = torch.utils.data.ConcatDataset(concat_dataloaders) print('Total dataset size: {}'.format(len(data_inuse))) print('=' * 80) return data_inuse
def main(): parser = argparse.ArgumentParser(description='HSM-Net') parser.add_argument('--maxdisp', type=int, default=384, help='maxium disparity') parser.add_argument('--name', default='name') parser.add_argument('--database', default='/data/private', help='data path') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--val_batch_size', type=int, default=1, help='samples per batch') parser.add_argument('--loadmodel', default=None, help='weights path') parser.add_argument('--log_dir', default="/data/private/logs/high-res-stereo") parser.add_argument("--testres", default=[0], nargs="+") parser.add_argument("--no_aug",default=False, action="store_true") args = parser.parse_args() torch.manual_seed(args.seed) torch.manual_seed(args.seed) # set again torch.cuda.manual_seed(args.seed) args.name = args.name + "_" + time.strftime('%l:%M%p_%Y%b%d').strip(" ") testres = [] for i in args.testres: testres.append(float(i)) args.testres=testres all_left_img, all_right_img, all_left_disp, left_val, right_val, disp_val_L = lk15.dataloader( '%s/KITTI2015/data_scene_flow/training/' % args.database, val=True) left_val = [left_val[3]] right_val = [right_val[3]] disp_val_L = [disp_val_L[3]] # all_l = all_left_disp + left_val # all_r = all_right_img + right_val # all_d = all_left_disp + disp_val_L # correct_shape = (1242, 375) # for i in range(len(all_l)): # l = np.array(Image.open(all_l[i]).convert("RGB")) # r = np.array(Image.open(all_r[i]).convert("RGB")) # d = Image.open(all_d[i]) # if l.shape != (375, 1242, 3): # # l2 = cv2.resize(l, correct_shape, interpolation=cv2.INTER_CUBIC) # r2 = cv2.resize(r, correct_shape, interpolation=cv2.INTER_CUBIC) # d2 = np.array(torchvision.transforms.functional.resize(d, [375, 1242])) # # d = np.stack([d, d, d], axis=-1) # # d2 = cv2.resize(d.astype("uint16"), correct_shape) # # cv2.imwrite(all_l[i], cv2.cvtColor(l2, cv2.COLOR_RGB2BGR)) # cv2.imwrite(all_r[i], cv2.cvtColor(r2, cv2.COLOR_RGB2BGR)) # cv2.imwrite(all_d[i], d2) # cv2.resize(l,()) model = hsm(args.maxdisp, clean=False, level=1) model.cuda() # load model print("loading pretrained model: " + str(args.loadmodel)) pretrained_dict = torch.load(args.loadmodel) pretrained_dict['state_dict'] = {k: v for k, v in pretrained_dict['state_dict'].items() if ('disp' not in k)} model = nn.DataParallel(model, device_ids=[0]) model.load_state_dict(pretrained_dict['state_dict'], strict=False) name = "val_at_many_res" + "_" + datetime.now().strftime("%Y-%m-%d_%H:%M:%S") log = logger.Logger(args.log_dir, name) val_sample_count = 0 for res in args.testres: val_loader_kitti15 = DA.myImageFloder(left_val, right_val, disp_val_L, is_validation=True, testres=res) ValImgLoader = torch.utils.data.DataLoader(val_loader_kitti15, drop_last=False, batch_size=args.val_batch_size, shuffle=False, worker_init_fn=_init_fn, num_workers=0) print("================ res: " + str(res) + " ============================") ## val ## val_score_accum_dict = {} val_img_idx = 0 for batch_idx, (imgL_crop, imgR_crop, disp_crop_L) in enumerate(ValImgLoader): vis, scores_list, err_map_list = val_step(model, imgL_crop, imgR_crop, disp_crop_L, args.maxdisp, res) for score, err_map in zip(scores_list, err_map_list): for (score_tag, score_val), (map_tag, map_val) in zip(score.items(), err_map.items()): log.scalar_summary("val/im_" + str(val_img_idx) + "/" + str(res) + "/"+ score_tag, score_val, val_sample_count) log.image_summary("val/" + str(res) + "/"+ map_tag, map_val, val_sample_count) if score_tag not in val_score_accum_dict.keys(): val_score_accum_dict[score_tag] = 0 val_score_accum_dict[score_tag]+=score_val print("res: " + str(res) + " " + score_tag + ": " + str(score_val)) val_img_idx+=1 val_sample_count += 1 log.image_summary('val/left', imgL_crop[0:1], val_sample_count) # log.image_summary('val/right', imgR_crop[0:1], val_sample_count) log.disp_summary('val/gt0', disp_crop_L[0:1], val_sample_count) # <-- GT disp log.entp_summary('val/entropy', vis['entropy'], val_sample_count) log.disp_summary('val/output3', vis['output3'][0], val_sample_count)
def main(): parser = argparse.ArgumentParser(description='HSM') parser.add_argument( '--datapath', default="/home/isaac/rvc_devkit/stereo/datasets_middlebury2014", help='test data path') parser.add_argument('--loadmodel', default=None, help='model path') parser.add_argument('--name', default='rvc_highres_output', help='output dir') parser.add_argument('--clean', type=float, default=-1, help='clean up output using entropy estimation') parser.add_argument( '--testres', type=float, default=0.5, #default used to be 0.5 help='test time resolution ratio 0-x') parser.add_argument('--max_disp', type=float, default=-1, help='maximum disparity to search for') parser.add_argument( '--level', type=int, default=1, help='output level of output, default is level 1 (stage 3),\ can also use level 2 (stage 2) or level 3 (stage 1)' ) parser.add_argument('--debug_image', type=str, default=None) parser.add_argument("--eth_testres", type=float, default=3.5) parser.add_argument("--score_results", action="store_true", default=False) parser.add_argument("--save_weights", action="store_true", default=False) parser.add_argument("--kitti", action="store_true", default=False) parser.add_argument("--eth", action="store_true", default=False) parser.add_argument("--mb", action="store_true", default=False) parser.add_argument("--all_data", action="store_true", default=False) parser.add_argument("--eval_train_only", action="store_true", default=False) parser.add_argument("--debug", action="store_true", default=False) parser.add_argument("--batchsize", type=int, default=16) parser.add_argument("--prepare_kitti", action="store_true", default=False) args = parser.parse_args() # wandb.init(name=args.name, project="high-res-stereo", save_code=True, magic=True, config=args) if not os.path.exists("output"): os.mkdir("output") kitti_merics = {} eth_metrics = {} mb_metrics = {} # construct model model = hsm(128, args.clean, level=args.level) model = convert_model(model) # wandb.watch(model) model = nn.DataParallel(model, device_ids=[0]) model.cuda() if args.loadmodel is not None: pretrained_dict = torch.load(args.loadmodel) pretrained_dict['state_dict'] = { k: v for k, v in pretrained_dict['state_dict'].items() if 'disp' not in k } model.load_state_dict(pretrained_dict['state_dict'], strict=False) else: print('run with random init') print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) model.eval() if not args.prepare_kitti: dataset = RVCDataset(args) if args.prepare_kitti: _, _, _, left_val, right_val, disp_val_L = lk15.dataloader( '/data/private/KITTI2015/data_scene_flow/training/', val=True) # change to trainval when finetuning on KITTI dataset = DA.myImageFloder(left_val, right_val, disp_val_L, rand_scale=[1, 1], order=0) dataloader = DataLoader(dataset, batch_size=args.batchsize, shuffle=False, num_workers=0) steps = 0 max_disp = None origianl_image_size = None top_pad = None left_pad = None testres = [args.testres] dataset_type = None data_path = [args.datapath] # for (imgL, imgR, gt_disp_raw, max_disp, origianl_image_size, top_pad, left_pad, testres, dataset_type , data_path) in dataloader: for (imgL, imgR, gt_disp_raw) in dataloader: # Todo: this is a hot fix. Must be fixed to handle batchsize greater than 1 data_path = data_path[0] img_name = os.path.basename(os.path.normpath(data_path)) testres = float(testres[0]) gt_disp_raw = gt_disp_raw[0] cum_metrics = None if dataset_type == 0: cum_metrics = mb_metrics elif dataset_type == 1: cum_metrics = eth_metrics elif dataset_type == 2: cum_metrics = kitti_merics print(img_name) if args.max_disp > 0: max_disp = int(args.max_disp) ## change max disp tmpdisp = int(max_disp * testres // 64 * 64) if (max_disp * testres / 64 * 64) > tmpdisp: model.module.maxdisp = tmpdisp + 64 else: model.module.maxdisp = tmpdisp if model.module.maxdisp == 64: model.module.maxdisp = 128 model.module.disp_reg8 = disparityregression(model.module.maxdisp, 16).cuda() model.module.disp_reg16 = disparityregression(model.module.maxdisp, 16).cuda() model.module.disp_reg32 = disparityregression(model.module.maxdisp, 32).cuda() model.module.disp_reg64 = disparityregression(model.module.maxdisp, 64).cuda() print(" max disparity = " + str(model.module.maxdisp)) # wandb.log({"imgL": wandb.Image(imgL, caption=img_name + ", " + str(tuple(imgL.shape))), # "imgR": wandb.Image(imgR, caption=img_name + ", " + str(tuple(imgR.shape)))}, step=steps) with torch.no_grad(): torch.cuda.synchronize() start_time = time.time() # * output dimensions same as input dimensions # * (ex: imgL[1, 3, 704, 2240] then pred_disp[1, 704, 2240]) pred_disp, entropy = model(imgL, imgR) torch.cuda.synchronize() ttime = (time.time() - start_time) print(' time = %.2f' % (ttime * 1000)) # * squeeze (remove dimensions with size 1) (ex: pred_disp[1, 704, 2240] ->[704, 2240]) pred_disp = torch.squeeze(pred_disp).data.cpu().numpy() top_pad = int(top_pad[0]) left_pad = int(left_pad[0]) entropy = entropy[top_pad:, :pred_disp.shape[1] - left_pad].cpu().numpy() pred_disp = pred_disp[top_pad:, :pred_disp.shape[1] - left_pad] # save predictions idxname = img_name if not os.path.exists('output/%s/%s' % (args.name, idxname)): os.makedirs('output/%s/%s' % (args.name, idxname)) idxname = '%s/disp0%s' % (idxname, args.name) # * shrink image back to the GT size (ex: pred_disp[675, 2236] -> [375, 1242]) # ! we element-wise divide pred_disp by testres becasue the image is shrinking, # ! so the distance between pixels should also shrink by the same factor pred_disp_raw = cv2.resize( pred_disp / testres, (origianl_image_size[1], origianl_image_size[0]), interpolation=cv2.INTER_LINEAR) pred_disp = pred_disp_raw # raw is to use for scoring gt_disp = gt_disp_raw.numpy() # * clip while keep inf # ? `pred_disp != pred_disp` is always true, right?? # ? `pred_disp[pred_invalid] = np.inf` why do this? pred_invalid = np.logical_or(pred_disp == np.inf, pred_disp != pred_disp) pred_disp[pred_invalid] = np.inf pred_disp_png = (pred_disp * 256).astype("uint16") gt_invalid = np.logical_or(gt_disp == np.inf, gt_disp != gt_disp) gt_disp[gt_invalid] = 0 gt_disp_png = (gt_disp * 256).astype("uint16") entorpy_png = (entropy * 256).astype('uint16') # ! raw output to png pred_disp_path = 'output/%s/%s/disp.png' % (args.name, idxname.split('/')[0]) gt_disp_path = 'output/%s/%s/gt_disp.png' % (args.name, idxname.split('/')[0]) assert (cv2.imwrite(pred_disp_path, pred_disp_png)) assert (cv2.imwrite(gt_disp_path, gt_disp_png)) assert (cv2.imwrite( 'output/%s/%s/ent.png' % (args.name, idxname.split('/')[0]), entorpy_png)) # ! Experimental color maps gt_disp_color_path = 'output/%s/%s/gt_disp_color.png' % ( args.name, idxname.split('/')[0]) pred_disp_color_path = 'output/%s/%s/disp_color.png' % ( args.name, idxname.split('/')[0]) gt_colormap = convert_to_colormap(gt_disp_png) pred_colormap = convert_to_colormap(pred_disp_png) entropy_colormap = convert_to_colormap(entorpy_png) assert (cv2.imwrite(gt_disp_color_path, gt_colormap)) assert (cv2.imwrite(pred_disp_color_path, pred_colormap)) # ! diff colormaps diff_colormap_path = 'output/%s/%s/diff_color.png' % ( args.name, idxname.split('/')[0]) false_positive_path = 'output/%s/%s/false_positive_color.png' % ( args.name, idxname.split('/')[0]) false_negative_path = 'output/%s/%s/false_negative_color.png' % ( args.name, idxname.split('/')[0]) gt_disp_png[gt_invalid] = pred_disp_png[gt_invalid] gt_disp_png = gt_disp_png.astype("int32") pred_disp_png = pred_disp_png.astype("int32") diff_colormap = convert_to_colormap(np.abs(gt_disp_png - pred_disp_png)) false_positive_colormap = convert_to_colormap( np.abs(np.clip(gt_disp_png - pred_disp_png, None, 0))) false_negative_colormap = convert_to_colormap( np.abs(np.clip(gt_disp_png - pred_disp_png, 0, None))) assert (cv2.imwrite(diff_colormap_path, diff_colormap)) assert (cv2.imwrite(false_positive_path, false_positive_colormap)) assert (cv2.imwrite(false_negative_path, false_negative_colormap)) out_pfm_path = 'output/%s/%s.pfm' % (args.name, idxname) with open(out_pfm_path, 'w') as f: save_pfm(f, pred_disp[::-1, :]) with open( 'output/%s/%s/time_%s.txt' % (args.name, idxname.split('/')[0], args.name), 'w') as f: f.write(str(ttime)) print(" output = " + out_pfm_path) caption = img_name + ", " + str( tuple(pred_disp_png.shape)) + ", max disparity = " + str( int(max_disp[0])) + ", time = " + str(ttime) # read GT depthmap and upload as jpg # wandb.log({"disparity": wandb.Image(pred_colormap, caption=caption) , "gt": wandb.Image(gt_colormap), "entropy": wandb.Image(entropy_colormap, caption= str(entorpy_png.shape)), # "diff":wandb.Image(diff_colormap), "false_positive":wandb.Image(false_positive_colormap), "false_negative":wandb.Image(false_negative_colormap)}, step=steps) torch.cuda.empty_cache() steps += 1 # Todo: find out what mask0nocc does. It's probably not the same as KITTI's object map if dataset_type == 2: obj_map_path = os.path.join(data_path, "obj_map.png") else: obj_map_path = None if args.score_results: if pred_disp_raw.shape != gt_disp_raw.shape: # pred_disp_raw[375 x 1242] gt_disp_raw[675 x 2236] ratio = float(gt_disp_raw.shape[1]) / pred_disp_raw.shape[1] disp_resized = cv2.resize( pred_disp_raw, (gt_disp_raw.shape[1], gt_disp_raw.shape[0])) * ratio pred_disp_raw = disp_resized # [675 x 2236] # if args.debug: # out_resized_pfm_path = 'output/%s/%s/pred_scored.pfm' % (args.name, img_name) # with open(out_resized_pfm_path, 'w') as f: # save_pfm(f, pred_disp_raw) # out_resized_gt_path = 'output/%s/%s/gt_scored.pfm' % (args.name, img_name) # with open(out_resized_gt_path, 'w') as f: # save_pfm(f, gt_disp_raw.numpy()) metrics = score_rvc.get_metrics( pred_disp_raw, gt_disp_raw, int(max_disp[0]), dataset_type, ('output/%s/%s' % (args.name, idxname.split('/')[0])), disp_path=pred_disp_path, gt_path=gt_disp_path, obj_map_path=obj_map_path, debug=args.debug) avg_metrics = {} for (key, val) in metrics.items(): if cum_metrics.get(key) == None: cum_metrics[key] = [] cum_metrics[key].append(val) avg_metrics["avg_" + key] = sum(cum_metrics[key]) / len( cum_metrics[key]) # wandb.log(metrics, step=steps) # wandb.log(avg_metrics, step=steps) # if args.save_weights and os.path.exists(args.loadmodel): # wandb.save(args.loadmodel) if args.prepare_kitti and (args.all_data or args.kitti): in_path = 'output/%s' % (args.name) out_path = "/home/isaac/high-res-stereo/kitti_submission_output" out_path = prepare_kitti(in_path, out_path) subprocess.run( ["/home/isaac/KITTI2015_devkit/cpp/eval_scene_flow", out_path]) print("KITTI submission evaluation saved to: " + out_path)
right_disparity=all_right_disp, rand_scale=[0.225, 0.6 * scale_factor], rand_bright=[0.8, 1.2], order=0) all_left_img, all_right_img, all_left_disp, all_right_disp = lt.dataloader( '%s/sceneflow/' % args.database) loader_scene = DA.myImageFloder(all_left_img, all_right_img, all_left_disp, right_disparity=all_right_disp, rand_scale=[0.9, 2.4 * scale_factor], order=2) all_left_img, all_right_img, all_left_disp, _, _, _ = lk15.dataloader( '%s/kitti_scene/training/' % args.database, typ='train') # change to trainval when finetuning on KITTI loader_kitti15 = DA.myImageFloder(all_left_img, all_right_img, all_left_disp, rand_scale=[0.9, 2.4 * scale_factor], order=0) all_left_img, all_right_img, all_left_disp = lk12.dataloader( '%s/data_stereo_flow/training/' % args.database) loader_kitti12 = DA.myImageFloder(all_left_img, all_right_img, all_left_disp, rand_scale=[0.9, 2.4 * scale_factor], order=0) all_left_img, all_right_img, all_left_disp, _ = ls.dataloader('%s/eth3d/' %
def main(): global args torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) np.random.seed(args.seed) if args.datatype == '2015': all_left_img, all_right_img, all_left_disp, test_left_img, test_right_img, test_left_disp = ls2015.dataloader( args.datapath2015, split = args.split_for_val) from dataloader import KITTILoader as DA elif args.datatype == 'Sence Flow': train_left_img, train_right_img, train_left_disp, test_left_img, test_right_img, test_left_disp = lt.dataloader( args.datapath) from dataloader import SecenFlowLoader as DA else: AssertionError if not os.path.isdir(args.save_path): os.makedirs(args.save_path) log = logger.setup_logger(args.save_path + '/FLOPs_inference_time.log') for key, value in sorted(vars(args).items()): log.info(str(key) + ': ' + str(value)) if args.model_types == "PSMNet": model = PSMNet(args) args.loss_weights = [0.5, 0.7, 1.] #from dataloader import SecenFlowLoader as DA elif args.model_types == "PSMNet_TSM": model = PSMNet_TSM(args) args.loss_weights = [0.5, 0.7, 1.] #from dataloader import SecenFlowLoader as DA elif args.model_types == "Hybrid_Net": model = Hybrid_Net(args) args.loss_weights = [0.5, 0.7, 1., 1., 1.] #from dataloader import SecenFlowLoader as DA elif args.model_types == "Hybrid_Net_DSM" : model = Hybrid_Net(args) args.loss_weights = [0.5, 0.7, 1., 1., 1.] #from dataloader import SecenFlowLoader as DA else: AssertionError("model error") model = nn.DataParallel(model).cuda() for i in range (30): #print("test_left_img", test_left_img[i]) log.info("=> test_left_img '{}'".format(test_left_img[i])) TestImgLoader = torch.utils.data.DataLoader( DA.myImageFloder(test_left_img, test_right_img, test_left_disp, False), batch_size=args.test_bsize, shuffle=False, num_workers=4, drop_last=False) if args.resume: if os.path.isfile(args.resume): log.info("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] + 1 model.load_state_dict(checkpoint['state_dict']) #optimizer.load_state_dict(checkpoint['optimizer']) log.info("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: log.info("=> no checkpoint found at '{}'".format(args.resume)) log.info("=> Will start from scratch.") else: log.info('Not Resume') log.info('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()]))) #if args.testing: test(TestImgLoader, model, log)
def main(): global args log = logger.setup_logger(args.save_path + '/training.log') train_left_img, train_right_img, train_left_disp, test_left_img, test_right_img, test_left_disp = ls.dataloader( args.datapath, log) TrainImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( train_left_img, train_right_img, train_left_disp, True), batch_size=args.train_bsize, shuffle=True, num_workers=4, drop_last=False) TestImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( test_left_img, test_right_img, test_left_disp, False), batch_size=args.test_bsize, shuffle=False, num_workers=4, drop_last=False) if not os.path.isdir(args.save_path): os.makedirs(args.save_path) for key, value in sorted(vars(args).items()): log.info(str(key) + ': ' + str(value)) model = models.anynet.AnyNet(args) model = nn.DataParallel(model).cuda() optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999)) log.info('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) if args.pretrained: if os.path.isfile(args.pretrained): checkpoint = torch.load(args.pretrained) model.load_state_dict(checkpoint['state_dict']) log.info("=> loaded pretrained model '{}'".format(args.pretrained)) else: log.info("=> no pretrained model found at '{}'".format( args.pretrained)) log.info("=> Will start from scratch.") args.start_epoch = 0 if args.resume: if os.path.isfile(args.resume): log.info("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) log.info("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: log.info("=> no checkpoint found at '{}'".format(args.resume)) log.info("=> Will start from scratch.") else: log.info('Not Resume') cudnn.benchmark = True start_full_time = time.time() for epoch in range(args.start_epoch, args.epochs): log.info('This is {}-th epoch'.format(epoch)) adjust_learning_rate(optimizer, epoch) train(TrainImgLoader, model, optimizer, log, epoch) savefilename = args.save_path + '/checkpoint.tar' torch.save( { 'epoch': epoch, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(), }, savefilename) if epoch % 1 == 0: test(TestImgLoader, model, log) test(TestImgLoader, model, log) log.info('full training time = {:.2f} Hours'.format( (time.time() - start_full_time) / 3600))
else: print('run with random init') print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # dry run multip = 48 imgL = np.zeros((1, 3, 24 * multip, 32 * multip)) imgR = np.zeros((1, 3, 24 * multip, 32 * multip)) imgL = Variable(torch.FloatTensor(imgL).cuda()) imgR = Variable(torch.FloatTensor(imgR).cuda()) with torch.no_grad(): model.eval() pred_disp, entropy = model(imgL, imgR) _, _, _, left_val, right_val, disp_val_L = lk15.dataloader( '/DATA1/isaac/KITTI2015/data_scene_flow/training/', val=True) def main(): processed = get_transform() model.eval() # save predictions out_path = os.path.join("./kitti_submission_output", args.name) if not os.path.exists(out_path): os.mkdir(out_path) out_dir = os.path.join(out_path, "disp_0") if not os.path.exists(out_dir): os.mkdir(out_dir) for (left_img_path, right_img_path,
def init_dataloader(input_args): batch_size = input_args.batchsize scale_factor = input_args.maxdisp / 384. # controls training resolution hrvs_folder = '%s/hrvs/carla-highres/trainingF' % input_args.database all_left_img, all_right_img, all_left_disp, all_right_disp = ls.dataloader( hrvs_folder) loader_carla = DA.myImageFloder(all_left_img, all_right_img, all_left_disp, right_disparity=all_right_disp, rand_scale=[0.225, 0.6 * scale_factor], rand_bright=[0.8, 1.2], order=2) middlebury_folder = '%s/middlebury/mb-ex-training/trainingF' % input_args.database all_left_img, all_right_img, all_left_disp, all_right_disp = ls.dataloader( middlebury_folder) loader_mb = DA.myImageFloder(all_left_img, all_right_img, all_left_disp, right_disparity=all_right_disp, rand_scale=[0.225, 0.6 * scale_factor], rand_bright=[0.8, 1.2], order=0) rand_scale = [0.9, 2.4 * scale_factor] all_left_img, all_right_img, all_left_disp, all_right_disp = lt.dataloader( '%s/sceneflow/' % input_args.database) loader_scene = DA.myImageFloder(all_left_img, all_right_img, all_left_disp, right_disparity=all_right_disp, rand_scale=rand_scale, order=2) # change to trainval when finetuning on KITTI all_left_img, all_right_img, all_left_disp, _, _, _ = lk15.dataloader( '%s/kitti15/training/' % input_args.database, split='train') loader_kitti15 = DA.myImageFloder(all_left_img, all_right_img, all_left_disp, rand_scale=rand_scale, order=0) all_left_img, all_right_img, all_left_disp = lk12.dataloader( '%s/kitti12/training/' % input_args.database) loader_kitti12 = DA.myImageFloder(all_left_img, all_right_img, all_left_disp, rand_scale=rand_scale, order=0) all_left_img, all_right_img, all_left_disp, _ = ls.dataloader( '%s/eth3d/' % input_args.database) loader_eth3d = DA.myImageFloder(all_left_img, all_right_img, all_left_disp, rand_scale=rand_scale, order=0) all_left_img, all_right_img, all_left_disp, all_right_disp = lidar_dataloader( '%s/lidar-hdsm-dataset/' % input_args.database) loader_lidar = DA.myImageFloder(all_left_img, all_right_img, all_left_disp, right_disparity=all_right_disp, rand_scale=[0.5, 1.1 * scale_factor], rand_bright=[0.8, 1.2], order=2, flip_disp_ud=True, occlusion_size=[10, 25]) data_inuse = torch.utils.data.ConcatDataset( [loader_carla] * 10 + [loader_mb] * 150 + # 71 pairs [loader_scene] + # 39K pairs 960x540 [loader_kitti15] + [loader_kitti12] * 24 + [loader_eth3d] * 300 + [loader_lidar]) # 25K pairs # airsim ~750 train_dataloader = torch.utils.data.DataLoader(data_inuse, batch_size=batch_size, shuffle=True, num_workers=batch_size, drop_last=True, worker_init_fn=_init_fn) print('%d batches per epoch' % (len(data_inuse) // batch_size)) return train_dataloader
def main(): global args os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu train_left_img, train_right_img, train_left_disp, test_left_img, test_right_img, test_left_disp = ls.dataloader( args.datapath) TrainImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( train_left_img, train_right_img, train_left_disp, True), batch_size=args.train_bsize, shuffle=True, num_workers=1, drop_last=False) TestImgLoader = torch.utils.data.DataLoader(DA.myImageFloder( test_left_img, test_right_img, test_left_disp, False), batch_size=args.test_bsize, shuffle=False, num_workers=4, drop_last=False) if not os.path.isdir(args.save_path): os.makedirs(args.save_path) log = logger.setup_logger(args.save_path + 'training.log') for key, value in sorted(vars(args).items()): log.info(str(key) + ':' + str(value)) model = StereoNet(maxdisp=args.maxdisp) model = nn.DataParallel(model).cuda() model.apply(weights_init) optimizer = optim.RMSprop(model.parameters(), lr=args.lr) scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[200], gamma=args.gamma) log.info('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) args.start_epoch = 0 if args.resume: if os.path.isfile(args.resume): log.info("=> loading checkpoint '{}'".format((args.resume))) checkpoint = torch.load(args.resume) model.load_state_dict(checkpoint['state_dict']) log.info("=> loaded checkpoint '{}' (epeoch {})".format( args.resume, checkpoint['epoch'])) else: log.info("=> no checkpoint found at '{}'".format(args.resume)) log.info("=> will start from scratch.") else: log.info("Not Resume") min_erro = 100000 max_epo = 0 start_full_time = time.time() for epoch in range(args.start_epoch, args.epoch): log.info('This is {}-th epoch'.format(epoch)) train(TrainImgLoader, model, optimizer, log, epoch) scheduler.step() erro = test(TestImgLoader, model, log) if erro < min_erro: max_epo = epoch min_erro = erro savefilename = args.save_path + 'finetune_checkpoint_{}.pth'.format( max_epo) torch.save({ 'epoch': epoch, 'state_dict': model.state_dict() }, savefilename) log.info('MIN epoch %d total test erro = %.3f' % (max_epo, min_erro)) log.info('full training time = {: 2f} Hours'.format( (time.time() - start_full_time) / 3600))