def main(args): """ main function for testing param args: global arguments return: None """ t = PrettyTable(['args_name', 'args_value']) for k in list(vars(args).keys()): t.add_row([k, vars(args)[k]]) print(t.get_string(title="Predict Arguments")) # build the model model = build_model(args.model, args.classes, args.backbone, args.pretrained, args.out_stride, args.mult_grid) # load the test set if args.predict_type == 'validation': testdataset, class_dict_df = build_dataset_test(args.root, args.dataset, args.crop_size, mode=args.predict_mode, gt=True) else: testdataset, class_dict_df = build_dataset_test(args.root, args.dataset, args.crop_size, mode=args.predict_mode, gt=False) DataLoader = data.DataLoader(testdataset, batch_size=args.batch_size, shuffle=False, num_workers=args.batch_size, pin_memory=True, drop_last=False) if args.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus model = model.cuda() cudnn.benchmark = True if not torch.cuda.is_available(): raise Exception("no GPU found or wrong gpu id, please run without --cuda") if not os.path.exists(args.save_seg_dir): os.makedirs(args.save_seg_dir) if args.checkpoint: if os.path.isfile(args.checkpoint): checkpoint = torch.load(args.checkpoint)['model'] check_list = [i for i in checkpoint.items()] # Read weights with multiple cards, and continue training with a single card this time if 'module.' in check_list[0][0]: # 读取使用多卡训练权重,并且此次使用单卡预测 new_stat_dict = {} for k, v in checkpoint.items(): new_stat_dict[k[7:]] = v model.load_state_dict(new_stat_dict, strict=True) # Read the training weight of a single card, and continue training with a single card this time else: model.load_state_dict(checkpoint) else: print("no checkpoint found at '{}'".format(args.checkpoint)) raise FileNotFoundError("no checkpoint found at '{}'".format(args.checkpoint)) # define loss function criterion = build_loss(args, None, 255) print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n" ">>>>>>>>>>> beginning testing >>>>>>>>>>>>\n" ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") predict_multiscale_sliding(args=args, model=model, testLoader=DataLoader, class_dict_df=class_dict_df, scales=args.scales, overlap=args.overlap, criterion=criterion, mode=args.predict_type, save_result=True)
def predict_model(args): """ main function for testing param args: global arguments return: None """ print(args) if args.cuda: print("=====> use gpu id: '{}'".format(args.gpus)) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus if not torch.cuda.is_available(): raise Exception( "no GPU found or wrong gpu id, please run without --cuda") # build the model model = build_model(args.model, num_classes=args.classes) if args.cuda: model = model.cuda() # using GPU for inference cudnn.benchmark = True if args.checkpoint: if os.path.isfile(args.checkpoint): print("=====> loading checkpoint '{}'".format(args.checkpoint)) checkpoint = torch.load(args.checkpoint) model.load_state_dict(checkpoint['model']) # model.load_state_dict(convert_state_dict(checkpoint['model'])) else: print("=====> no checkpoint found at '{}'".format(args.checkpoint)) raise FileNotFoundError("no checkpoint found at '{}'".format( args.checkpoint)) if not os.path.exists(args.save_seg_dir): os.makedirs(args.save_seg_dir) # load the test set if args.use_txt_list: _, testLoader = build_dataset_test(args.dataset, args.num_workers, none_gt=True) else: _, testLoader = build_dataset_predict(args.image_input_path, args.dataset, args.num_workers, none_gt=True) print("=====> beginning testing") print("test set length: ", len(testLoader)) predict(args, testLoader, model)
def test_model(args): """ main function for testing param args: global arguments return: None """ print(args) if args.cuda: print("=====> use gpu id: '{}'".format(args.gpus)) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus if not torch.cuda.is_available(): raise Exception("no GPU found or wrong gpu id, please run without --cuda") # build the model model = build_model(args.model, num_classes=args.classes) if args.cuda: model = model.cuda() # using GPU for inference cudnn.benchmark = True if args.save: if not os.path.exists(args.save_seg_dir): os.makedirs(args.save_seg_dir) # load the test set datas, testLoader = build_dataset_test(args.dataset, args.num_workers) if not args.best: if args.checkpoint: if os.path.isfile(args.checkpoint): print("=====> loading checkpoint '{}'".format(args.checkpoint)) checkpoint = torch.load(args.checkpoint) model.load_state_dict(checkpoint['model']) # model.load_state_dict(convert_state_dict(checkpoint['model'])) else: print("=====> no checkpoint found at '{}'".format(args.checkpoint)) raise FileNotFoundError("no checkpoint found at '{}'".format(args.checkpoint)) print("=====> beginning validation") print("validation set length: ", len(testLoader)) mIOU_val, per_class_iu = test(args, testLoader, model) print(mIOU_val) print(per_class_iu) # Get the best test result among the last 10 model records. else: if args.checkpoint: if os.path.isfile(args.checkpoint): dirname, basename = os.path.split(args.checkpoint) epoch = int(os.path.splitext(basename)[0].split('_')[1]) mIOU_val = [] per_class_iu = [] for i in range(epoch - 9, epoch + 1): basename = 'model_' + str(i) + '.pth' resume = os.path.join(dirname, basename) checkpoint = torch.load(resume) model.load_state_dict(checkpoint['model']) print("=====> beginning test the" + basename) print("validation set length: ", len(testLoader)) mIOU_val_0, per_class_iu_0 = test(args, testLoader, model) mIOU_val.append(mIOU_val_0) per_class_iu.append(per_class_iu_0) index = list(range(epoch - 9, epoch + 1))[np.argmax(mIOU_val)] print("The best mIoU among the last 10 models is", index) print(mIOU_val) per_class_iu = per_class_iu[np.argmax(mIOU_val)] mIOU_val = np.max(mIOU_val) print(mIOU_val) print(per_class_iu) else: print("=====> no checkpoint found at '{}'".format(args.checkpoint)) raise FileNotFoundError("no checkpoint found at '{}'".format(args.checkpoint)) # Save the result if not args.best: model_path = os.path.splitext(os.path.basename(args.checkpoint)) args.logFile = 'test_' + model_path[0] + '.txt' logFileLoc = os.path.join(os.path.dirname(args.checkpoint), args.logFile) else: args.logFile = 'test_' + 'best' + str(index) + '.txt' logFileLoc = os.path.join(os.path.dirname(args.checkpoint), args.logFile) # Save the result if os.path.isfile(logFileLoc): logger = open(logFileLoc, 'a') else: logger = open(logFileLoc, 'w') logger.write("Mean IoU: %.4f" % mIOU_val) logger.write("\nPer class IoU: ") for i in range(len(per_class_iu)): logger.write("%.4f\t" % per_class_iu[i]) logger.flush() logger.close()
def main(args): """ args: args: global arguments """ # set the seed setup_seed(GLOBAL_SEED) # cudnn.enabled = True # cudnn.benchmark = True # find the optimal configuration # cudnn.deterministic = True # reduce volatility # learning scheduling, for 10 epoch lr*0.8 # lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=10, gamma=0.85) # build the model and initialization weights model = build_model(args.model, args.classes, args.backbone, args.pretrained, args.out_stride, args.mult_grid) # define loss function, respectively criterion = build_loss(args, None, ignore_label) # load train set and data augmentation datas, traindataset = build_dataset_train(args.root, args.dataset, args.base_size, args.crop_size) # load the test set, if want set cityscapes test dataset change none_gt=False testdataset, class_dict_df = build_dataset_test(args.root, args.dataset, args.crop_size, mode=args.predict_mode, gt=True) # move model and criterion on cuda if args.cuda: os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus_id dist.init_process_group(backend="nccl", init_method='env://') args.local_rank = torch.distributed.get_rank() torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) gpus = len(list(os.environ["CUDA_VISIBLE_DEVICES"])) - (len(list(os.environ["CUDA_VISIBLE_DEVICES"])) // 2) trainLoader, model, criterion = Distribute(args, traindataset, model, criterion, device, gpus) # test with distributed # testLoader, _, _ = Distribute(args, testdataset, model, criterion, device, gpus) # test with single card testLoader = data.DataLoader(testdataset, batch_size=args.batch_size, shuffle=True, num_workers=args.batch_size, pin_memory=True, drop_last=False) if not torch.cuda.is_available(): raise Exception("No GPU found or Wrong gpu id, please run without --cuda") # define optimization strategy # parameters = [{'params': model.get_1x_lr_params(), 'lr': args.lr}, # {'params': model.get_10x_lr_params(), 'lr': args.lr}] parameters = model.parameters() if args.optim == 'sgd': optimizer = torch.optim.SGD(parameters, lr=args.lr, momentum=0.9, weight_decay=5e-4, nesterov=False) elif args.optim == 'adam': optimizer = torch.optim.Adam(parameters, weight_decay=5e-4) elif args.optim == 'adamw': optimizer = torch.optim.AdamW(parameters, weight_decay=5e-4) # initial log file val output save args.savedir = (args.savedir + args.dataset + '/' + args.model + '/') if not os.path.exists(args.savedir) and args.local_rank == 0: os.makedirs(args.savedir) # save_seg_dir args.save_seg_dir = os.path.join(args.savedir, args.predict_mode) if not os.path.exists(args.save_seg_dir) and args.local_rank == 0: os.makedirs(args.save_seg_dir) recorder = record_log(args) if args.resume == None and args.local_rank == 0: recorder.record_args(datas, str(netParams(model) / 1e6) + ' M', GLOBAL_SEED) # initialize the early_stopping object early_stopping = EarlyStopping(patience=300) start_epoch = 1 if args.local_rank == 0: print(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>\n" ">>>>>>>>>>> beginning training >>>>>>>>>>>\n" ">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>") epoch_list = [] lossTr_list = [] Miou_list = [] lossVal_list = [] Miou = 0 Best_Miou = 0 # continue training if args.resume: logger, lines = recorder.resume_logfile() for index, line in enumerate(lines): lossTr_list.append(float(line.strip().split()[2])) if len(line.strip().split()) != 3: epoch_list.append(int(line.strip().split()[0])) lossVal_list.append(float(line.strip().split()[3])) Miou_list.append(float(line.strip().split()[5])) if os.path.isfile(args.resume): checkpoint = torch.load(args.resume) start_epoch = checkpoint['epoch'] + 1 optimizer.load_state_dict(checkpoint['optimizer']) check_list = [i for i in checkpoint['model'].items()] # Read weights with multiple cards, and continue training with a single card this time if 'module.' in check_list[0][0]: new_stat_dict = {} for k, v in checkpoint['model'].items(): new_stat_dict[k[:]] = v model.load_state_dict(new_stat_dict, strict=True) # Read the training weight of a single card, and continue training with a single card this time else: model.load_state_dict(checkpoint['model']) if args.local_rank == 0: print("loaded checkpoint '{}' (epoch {})".format(args.resume, checkpoint['epoch'])) else: if args.local_rank == 0: print("no checkpoint found at '{}'".format(args.resume)) else: logger = recorder.initial_logfile() logger.flush() for epoch in range(start_epoch, args.max_epochs + 1): start_time = time.time() # training train_start = time.time() lossTr, lr = train(args, trainLoader, model, criterion, optimizer, epoch, device) if args.local_rank == 0: lossTr_list.append(lossTr) train_end = time.time() train_per_epoch_seconds = train_end - train_start validation_per_epoch_seconds = 60 # init validation time # validation if mode==validation, predict with label; elif mode==predict, predict without label. if epoch % args.val_epochs == 0 or epoch == 1 or args.max_epochs - 10 < epoch <= args.max_epochs: validation_start = time.time() loss, FWIoU, Miou, MIoU, PerCiou_set, Pa, PerCpa_set, Mpa, MF, F_set, F1_avg = \ predict_multiscale_sliding(args=args, model=model, testLoader=testLoader, class_dict_df=class_dict_df, # scales=[1.25, 1.5, 1.75, 2.0], scales=[1.0], overlap=0.3, criterion=criterion, mode=args.predict_type, save_result=True) torch.cuda.empty_cache() if args.local_rank == 0: epoch_list.append(epoch) Miou_list.append(Miou) lossVal_list.append(loss.item()) # record trainVal information recorder.record_trainVal_log(logger, epoch, lr, lossTr, loss, FWIoU, Miou, MIoU, PerCiou_set, Pa, Mpa, PerCpa_set, MF, F_set, F1_avg, class_dict_df) torch.cuda.empty_cache() validation_end = time.time() validation_per_epoch_seconds = validation_end - validation_start else: if args.local_rank == 0: # record train information recorder.record_train_log(logger, epoch, lr, lossTr) # # Update lr_scheduler. In pytorch 1.1.0 and later, should call 'optimizer.step()' before 'lr_scheduler.step()' # lr_scheduler.step() if args.local_rank == 0: # draw log fig draw_log(args, epoch, epoch_list, lossTr_list, Miou_list, lossVal_list) # save the model model_file_name = args.savedir + '/best_model.pth' last_model_file_name = args.savedir + '/last_model.pth' state = { "epoch": epoch, "model": model.state_dict(), 'optimizer': optimizer.state_dict() } if Miou > Best_Miou: Best_Miou = Miou torch.save(state, model_file_name) recorder.record_best_epoch(epoch, Best_Miou, Pa) # early_stopping monitor early_stopping.monitor(monitor=Miou) if early_stopping.early_stop: print("Early stopping and Save checkpoint") if not os.path.exists(last_model_file_name): torch.save(state, last_model_file_name) torch.cuda.empty_cache() # empty_cache loss, FWIoU, Miou, Miou_Noback, PerCiou_set, Pa, PerCpa_set, Mpa, MF, F_set, F1_Noback = \ predict_multiscale_sliding(args=args, model=model, testLoader=testLoader, scales=[1.0], overlap=0.3, criterion=criterion, mode=args.predict_type, save_result=False) print("Epoch {} lr= {:.6f} Train Loss={:.4f} Val Loss={:.4f} Miou={:.4f} PerCiou_set={}\n" .format(epoch, lr, lossTr, loss, Miou, str(PerCiou_set))) break total_second = start_time + (args.max_epochs - epoch) * train_per_epoch_seconds + \ ((args.max_epochs - epoch) / args.val_epochs + 10) * validation_per_epoch_seconds + 43200 print('Best Validation MIoU:{}'.format(Best_Miou)) print('Training deadline is: {}\n'.format(time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(total_second))))
def main(): """ Main Function """ # Parse args and set up logging infer_args() if args.single_scale: scales = [1.0] else: scales = [float(x) for x in args.scales.split(',')] output_dir = os.path.join(args.ckpt_path, args.exp_name, args.split) os.makedirs(output_dir, exist_ok=True) save_log('eval', output_dir, date_str) logging.info("Network Arch: %s", args.arch) logging.info("CV split: %d", args.cv_split) logging.info("Exp_name: %s", args.exp_name) logging.info("Ckpt path: %s", args.ckpt_path) logging.info("Scales : %s", ' '.join(str(e) for e in scales)) logging.info("Inference mode: %s", args.inference_mode) # Set up network, loader, inference mode metrics = args.dataset != 'video_folder' if args.dataset == 'kitti' and args.split == 'test': metrics = False #test_loader = setup_loader() args.dataset_cls = cityscapes datas, test_loader = build_dataset_test(args.dataset, args.num_workers) runner = RunEval(output_dir, metrics, write_image=args.dump_images, dataset_cls=args.dataset_cls, inference_mode=args.inference_mode) net = get_net() # Fix the ASPP pool size to 105, which is the tensor size if you train with crop # size of 840x840 if args.fixed_aspp_pool: net.module.aspp.img_pooling = torch.nn.AvgPool2d(105) if args.inference_mode == 'sliding': #默认是这个模式 inference = inference_sliding elif args.inference_mode == 'pooling': inference = inference_pool elif args.inference_mode == 'whole': inference = inference_whole else: raise 'Not a valid inference mode: {}'.format(args.inference_mode) # Run Inference! pbar = tqdm(test_loader, desc='eval {}'.format(args.split), smoothing=1.0) for iteration, data in enumerate(pbar): if args.dataset == 'video_folder': imgs, img_names = data gt = None else: if args.inference_mode == 'pooling': base_img, gt_with_imgs, img_names = data base_img = base_img[0] imgs = gt_with_imgs[0] gt = gt_with_imgs[1] else: base_img = None imgs, gt, _, img_names = data runner.inf(imgs, img_names, gt, inference, net, scales, pbar, base_img) if iteration > 5 and args.test_mode: break # Calculate final overall statistics runner.final_dump()
def test_model(args): """ main function for testing param args: global arguments return: None """ print(args) mIOU_val_max = 0 if args.cuda: print("use gpu id: '{}'".format(args.gpus)) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus if not torch.cuda.is_available(): raise Exception("no GPU found or wrong gpu id, please run without --cuda") # build the model model = build_model(args.model, num_classes=args.classes) if args.cuda: model = model.cuda() # using GPU for inference cudnn.benchmark = True if args.save: if not os.path.exists(args.save_seg_dir): os.makedirs(args.save_seg_dir) # load the test set datas, testLoader = build_dataset_test(args.dataset, args.num_workers) if not args.best: if args.checkpoint: if os.path.isfile(args.checkpoint): print("loading checkpoint '{}'".format(args.checkpoint)) checkpoint = torch.load(args.checkpoint) model.load_state_dict(checkpoint['model']) # model.load_state_dict(convert_state_dict(checkpoint['model'])) else: print("no checkpoint found at '{}'".format(args.checkpoint)) raise FileNotFoundError("no checkpoint found at '{}'".format(args.checkpoint)) print("beginning validation") print("validation set length: ", len(testLoader)) miou, class_iou, fmiou, pa, mpa = test(args, testLoader, model) # Get the best test result among the last 10 model records. else: if args.checkpoint: if os.path.isfile(args.checkpoint): dirname, basename = os.path.split(args.checkpoint) mIOU_val = [] per_class_iu = [] check_num = [] checkpoint_name = glob.glob(dirname+'/*.pth') for i in checkpoint_name: name = i.split('/')[-1].split('_')[-1].split('.')[0] check_num.append(int(name)) check_num.sort() for i in check_num: basename = 'model_' + str(i) + '.pth' resume = os.path.join(dirname, basename) checkpoint = torch.load(resume) model.load_state_dict(checkpoint['model']) print("beginning test the:" + basename) print("validation set length: ", len(testLoader)) miou, class_iou, fmiou, pa, mpa = test(args, testLoader, model) print('Miou Val is ',miou) mIOU_val.append(miou) # index = list(range(epoch - 19, epoch + 1))[np.argmax(mIOU_val)] index = check_num[np.argmax(mIOU_val)] print("The best mIoU among the models is", index) mIOU_val_max = np.max(mIOU_val) else: print("no checkpoint found at '{}'".format(args.checkpoint)) raise FileNotFoundError("no checkpoint found at '{}'".format(args.checkpoint)) # Save the result if not args.best: model_path = os.path.splitext(os.path.basename(args.checkpoint)) args.logFile = 'test_' + model_path[0] + '.txt' logFileLoc = os.path.join(os.path.dirname(args.checkpoint), args.logFile) else: args.logFile = 'test_' + 'best' + str(index) + '.txt' logFileLoc = os.path.join(os.path.dirname(args.checkpoint), args.logFile) # Save the result if os.path.isfile(logFileLoc): logger = open(logFileLoc, 'a+') else: logger = open(logFileLoc, 'w') logger.write("Max Mean IoU: %.4f" % mIOU_val_max) logger.flush() logger.close()
def test_curve(args): """ main function for testing param args: global arguments return: None """ args.save = False args.best = False print(args) # if args.checkpoint: # if os.path.isdir(args.checkpoint): # model_dir = get_dir_list(args.checkpoint, args.model) # if len(model_dir) == 0: # print("=====> no checkpoint found at '{}'".format(args.checkpoint)) # return # else: # print("=====> no a dir '{}'".format(args.checkpoint)) # # raise FileNotFoundError("no checkpoint found at '{}'".format(args.checkpoint)) # return model_dir = [args.model + 'bs5gpu1_train_True'] if args.cuda: print("=====> use gpu id: '{}'".format(args.gpus)) os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus if not torch.cuda.is_available(): raise Exception( "no GPU found or wrong gpu id, please run without --cuda") # build the model model = build_model(args.model, num_classes=args.classes) init_weight(model, nn.init.kaiming_normal_, nn.BatchNorm2d, 1e-3, 0.1, mode='fan_in') if args.cuda: model = model.cuda() # using GPU for inference cudnn.benchmark = True # if args.save: # if not os.path.exists(args.save_seg_dir): # os.makedirs(args.save_seg_dir) # load the test set datas, testLoader = build_dataset_test(args.dataset, args.num_workers, args.batch_size) # datas, _, testLoader = build_dataset_train(args.dataset, (352,480), args.batch_size, 'train', False, False, args.num_workers) for d in model_dir: csv_path = path.join(args.checkpoint, d, d + '_test.csv') log_path = path.join(args.checkpoint, d, 'log.txt') if os.path.exists(log_path) and os.path.exists(csv_path) and ( os.path.getmtime(log_path) < os.path.getmtime(csv_path)): pass # print(csv_path + ' is the newest------------------------------!') # continue pth_list = get_file_list(path.join(args.checkpoint, d)) if len(pth_list) == 0: continue # print(pth_list) if args.dataset == 'cityscapes': results = pd.DataFrame(columns=[ 'epoch', 'mIoU', 'road', 'sidewalk', 'building', 'wall', 'fence', 'pole', 'traffic light', 'traffic sign', 'vegetation', 'terrain', 'sky', 'person', 'rider', 'car', 'truck', 'bus', 'train', 'motorcycle', 'bicycle' ]) elif (args.dataset == 'camvid') or (args.dataset == 'camvid352'): results = pd.DataFrame(columns=[ 'epoch', 'mIoU', 'Sky', 'Building', 'Pole', 'Road', 'Sidewalk', 'Tree', 'Sign', 'Fence', 'Car', 'Pedestrian', 'Bicyclist' ]) else: raise NotImplementedError( "This repository now supports two datasets: cityscapes and camvid, %s is not included" % args.dataset) for pth in pth_list: checkpoint = torch.load(path.join(args.checkpoint, d, pth), map_location=torch.device('cpu')) print("=====> beginning load model {}/{}".format(d, pth)) model.load_state_dict(checkpoint['model']) print("=====> beginning validation {}/{}".format(d, pth)) print("validation set length: ", len(testLoader)) mIOU_val, per_class_iu = test(args, testLoader, model) # mIOU_val, per_class_iu = 1,np.array([1,2,3,4,5,6,7,8,9,0,1]) epoch = int(pth.strip('model_').strip('.pth')) results.loc[results.shape[0]] = [epoch, mIOU_val ] + per_class_iu.tolist() results.sort_values(by=['epoch'], axis=0, inplace=True) results.reset_index(drop=True, inplace=True) results.to_csv(csv_path) print('save {}!!!!!'.format(csv_path))