def repeat_eval_ckpt(root_result_dir, ckpt_dir): root_result_dir = os.path.join(root_result_dir, 'eval', 'eval_all_' + args.extra_tag) os.makedirs(root_result_dir, exist_ok=True) log_file = os.path.join(root_result_dir, 'log_eval_all_%s.txt' % cfg.TEST.SPLIT) logger = create_logger(log_file) logger.info('**********************Start logging**********************') # save config for key, val in vars(args).items(): logger.info("{:16} {}".format(key, val)) save_config_to_file(cfg, logger=logger) # create dataloader & network test_loader = create_dataloader(logger) #model = PointRCNN(num_classes=test_loader.dataset.num_class, use_xyz=True, mode='TEST') model = cat(num_classes=test_loader.dataset.num_class, use_xyz=True, mode='TEST') model.cuda() # copy important files to backup backup_dir = os.path.join(root_result_dir, 'backup_files') os.makedirs(backup_dir, exist_ok=True) os.system('cp *.py %s/' % backup_dir) os.system('cp ../lib/net/*.py %s/' % backup_dir) os.system('cp ../lib/datasets/kitti_rcnn_dataset.py %s/' % backup_dir) # evaluated ckpt record ckpt_record_file = os.path.join(root_result_dir, 'eval_list_%s.txt' % cfg.TEST.SPLIT) with open(ckpt_record_file, 'a'): pass # tensorboard log tb_log = SummaryWriter( log_dir=os.path.join(root_result_dir, 'tensorboard_%s' % cfg.TEST.SPLIT)) while True: # check whether there is checkpoint which is not evaluated cur_epoch_id, cur_ckpt = get_no_evaluated_ckpt(ckpt_dir, ckpt_record_file) if cur_epoch_id == -1 or int(float(cur_epoch_id)) < args.start_epoch: wait_second = 30 print('Wait %s second for next check: %s' % (wait_second, ckpt_dir)) time.sleep(wait_second) continue # load checkpoint train_utils.load_checkpoint(model, filename=cur_ckpt) # start evaluation cur_result_dir = os.path.join(root_result_dir, 'epoch_%s' % cur_epoch_id, cfg.TEST.SPLIT) tb_dict = eval_one_epoch(model, test_loader, cur_epoch_id, cur_result_dir, logger) step = int(float(cur_epoch_id)) if step == float(cur_epoch_id): for key, val in tb_dict.items(): tb_log.add_scalar(key, val, step) # record this epoch which has been evaluated with open(ckpt_record_file, 'a') as f: print('%s' % cur_epoch_id, file=f) logger.info('Epoch %s has been evaluated' % cur_epoch_id)
cfg.IOUN.ENABLED = True cfg.RPN.ENABLED = cfg.RPN.FIXED = False root_result_dir = os.path.join('../', 'output', 'ioun', cfg.TAG + exp_id) if args.output_dir is not None: root_result_dir = args.output_dir os.makedirs(root_result_dir, exist_ok=True) log_file = os.path.join(root_result_dir, 'log_train.txt') logger = create_logger(log_file) logger.info('**********************Start logging**********************') # log to file for key, val in vars(args).items(): logger.info("{:16} {}".format(key, val)) save_config_to_file(cfg, logger=logger) # copy important files to backup backup_dir = os.path.join(root_result_dir, 'backup_files') os.makedirs(backup_dir, exist_ok=True) os.system('cp *.py %s/' % backup_dir) os.system('cp ../lib/net/*.py %s/' % backup_dir) os.system('cp ../lib/datasets/kitti_rcnn_dataset.py %s/' % backup_dir) os.system('cp ../lib/datasets/kitti_boxplace_dataset.py %s/' % backup_dir) os.system('cp ./train_utils/train_utils.py %s/' % backup_dir) os.system('cp ../lib/utils/loss_utils.py %s/' % backup_dir) # tensorboard log tb_log = SummaryWriter( log_dir=os.path.join(root_result_dir, 'tensorboard'))