Ejemplo n.º 1
0
def train_matcher(cfg,
                  model,
                  train_dataset,
                  eval_dataset=None,
                  vis_dataset=None,
                  distributed=False,
                  validate=False,
                  logger=None):
    if logger is None:
        logger = get_root_logger(cfg.log_level)

    # start training
    if distributed:
        _dist_train(model,
                    train_dataset,
                    cfg,
                    eval_dataset,
                    vis_dataset,
                    validate=validate,
                    logger=logger)
    else:
        _non_dist_train(model,
                        train_dataset,
                        cfg,
                        eval_dataset,
                        vis_dataset,
                        validate=validate,
                        logger=logger)
Ejemplo n.º 2
0
def main():
    args = parse_args()

    cfg = mmcv.Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True

    # init distributed env first, since logger depends on the dist info.
    if args.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)

    if args.checkpoint is not None:
        cfg.checkpoint = args.checkpoint
    if args.out_dir is not None:
        cfg.out_dir = args.out_dir
    if args.gpus is not None:
        cfg.gpus = args.gpus
    cfg.show = args.show

    mkdir_or_exist(cfg.out_dir)

    # init logger before other step and setup training logger
    logger = get_root_logger(cfg.out_dir,
                             cfg.log_level,
                             filename="test_log.txt")
    logger.info("Using {} GPUs".format(cfg.gpus))
    logger.info('Distributed training: {}'.format(distributed))

    # log environment info
    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    logger.info(args)

    logger.info("Running with config:\n{}".format(cfg.text))

    # build the dataset
    test_dataset = build_dataset(cfg, 'test')

    # build the model and load checkpoint
    model = build_model(cfg)
    checkpoint = load_checkpoint(model, cfg.checkpoint, map_location='cpu')

    if not distributed:
        model = MMDataParallel(model, device_ids=[0])
        outputs = single_gpu_test(model, test_dataset, cfg, args.show)
    else:
        model = MMDistributedDataParallel(model.cuda())
        outputs = multi_gpu_test(model,
                                 test_dataset,
                                 cfg,
                                 args.show,
                                 tmpdir=osp.join(cfg.out_dir, 'temp'))

    rank, _ = get_dist_info()
    if cfg.out_dir is not None and rank == 0:
        result_path = osp.join(cfg.out_dir, 'result.pkl')
        logger.info('\nwriting results to {}'.format(result_path))
        mmcv.dump(outputs, result_path)

        if args.evaluate:
            error_log_buffer = LogBuffer()
            for result in outputs:
                error_log_buffer.update(result['Error'])
            error_log_buffer.average()
            log_items = []
            for key in error_log_buffer.output.keys():

                val = error_log_buffer.output[key]
                if isinstance(val, float):
                    val = '{:.4f}'.format(val)
                log_items.append('{}: {}'.format(key, val))

            if len(error_log_buffer.output) == 0:
                log_items.append('nothing to evaluate!')

            log_str = 'Evaluation Result: \t'
            log_str += ', '.join(log_items)
            logger.info(log_str)
            error_log_buffer.clear()
Ejemplo n.º 3
0
def main():
    args = parse_args()

    cfg = Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True
    # update configs according to CLI args
    if args.work_dir is not None:
        cfg.work_dir = args.work_dir
    if args.resume_from is not None:
        cfg.resume_from = args.resume_from
    if args.validate is not None:
        cfg.validate = args.validate
    if args.gpus is not None:
        cfg.gpus = args.gpus

    # init distributed env first, since logger depends on the dist info.
    if args.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)

    mkdir_or_exist(cfg.work_dir)
    # init logger before other step and setup training logger
    # init the logger before other steps
    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    log_file = osp.join(cfg.work_dir, '{}_log.txt'.format(timestamp))
    logger = get_root_logger(cfg.work_dir, cfg.log_level, filename=log_file)
    logger.info("Using {} GPUs".format(cfg.gpus))
    logger.info('Distributed training: {}'.format(distributed))

    # log environment info
    logger.info("Collecting env info (might take some time)")
    dash_line = '-' * 60 + '\n'
    logger.info('Environment info:\n' + dash_line)
    logger.info("\n" + collect_env_info())
    logger.info('\n' + dash_line)

    logger.info(args)

    logger.info("Running with config:\n{}".format(cfg.text))

    # set random seeds
    if args.seed is not None:
        logger.info('Set random seed to {}'.format(args.seed))
        set_random_seed(args.seed)

    model = build_model(cfg)

    train_dataset = build_dataset(cfg, 'train')
    eval_dataset = build_dataset(cfg, 'eval')
    # all data here will be visualized as image on tensorboardX
    vis_dataset = build_dataset(cfg, 'vis')

    if cfg.checkpoint_config is not None:
        # save config file content in checkpoints as meta data
        cfg.checkpoint_config.meta = dict(config=cfg.text, )

    train_matcher(cfg,
                  model,
                  train_dataset,
                  eval_dataset,
                  vis_dataset,
                  distributed=distributed,
                  validate=args.validate,
                  logger=logger)
def main():
    args = parse_args()

    cfg = mmcv.Config.fromfile(args.config)
    # set cudnn_benchmark
    if cfg.get('cudnn_benchmark', False):
        torch.backends.cudnn.benchmark = True

    # init distributed env first, since logger depends on the dist info.
    if args.launcher == 'none':
        distributed = False
    else:
        distributed = True
        init_dist(args.launcher, **cfg.dist_params)

    if args.checkpoint is not None:
        cfg.checkpoint = args.checkpoint
    if args.out_dir is not None:
        cfg.out_dir = args.out_dir
    if args.gpus is not None:
        cfg.gpus = args.gpus
    cfg.show = True if args.show == 'True' else False

    mkdir_or_exist(cfg.out_dir)

    # init logger before other step and setup training logger
    # init the logger before other steps
    timestamp = time.strftime('%Y%m%d_%H%M%S', time.localtime())
    log_file = osp.join(cfg.out_dir, '{}_test_log.txt'.format(timestamp))
    logger = get_root_logger(cfg.out_dir, cfg.log_level, filename=log_file)
    logger.info("Using {} GPUs".format(cfg.gpus))
    logger.info('Distributed training: {}'.format(distributed))
    logger.info("Whether the result will be saved to disk in image: {}".format(
        args.show))

    # log environment info
    logger.info("Collecting env info (might take some time)")
    dash_line = '-' * 60 + '\n'
    logger.info('Environment info:\n' + dash_line)
    logger.info("\n" + collect_env_info())
    logger.info('\n' + dash_line)

    logger.info(args)

    logger.info("Running with config:\n{}".format(cfg.text))

    # build the dataset
    test_dataset = build_dataset(cfg, 'test')

    # build the model and load checkpoint
    model = build_model(cfg)
    checkpoint = load_checkpoint(model, cfg.checkpoint, map_location='cpu')

    if not distributed:
        model = MMDataParallel(model, device_ids=[0])
        outputs = single_gpu_test(model, test_dataset, cfg, cfg.show)
    else:
        model = MMDistributedDataParallel(
            model.cuda(),
            device_ids=[torch.cuda.current_device()],
            broadcast_buffers=False)
        outputs = multi_gpu_test(model,
                                 test_dataset,
                                 cfg,
                                 cfg.show,
                                 tmpdir=osp.join(cfg.out_dir, 'temp'))

    rank, _ = get_dist_info()
    if cfg.out_dir is not None and rank == 0:
        result_path = osp.join(cfg.out_dir, 'result.pkl')
        logger.info('\nwriting results to {}'.format(result_path))
        mmcv.dump(outputs, result_path)

        if args.validate:
            error_log_buffer = LogBuffer()
            for result in outputs:
                error_log_buffer.update(result['Error'])
            error_log_buffer.average()

            task = cfg.get('task', 'stereo')
            # for better visualization, format into pandas
            format_output_dict = output_evaluation_in_pandas(
                error_log_buffer.output, task)

            log_items = []
            for key, val in format_output_dict.items():
                if isinstance(val, pd.DataFrame):
                    log_items.append("\n{}:\n{} \n".format(key, val))
                elif isinstance(val, float):
                    val = "{:.4f}".format(val)
                    log_items.append("{}: {}".format(key, val))
                else:
                    log_items.append("{}: {}".format(key, val))

            if len(error_log_buffer.output) == 0:
                log_items.append('nothing to evaluate!')

            log_str = 'Evaluation Result: \t'
            log_str += ", ".join(log_items)
            logger.info(log_str)
            error_log_buffer.clear()