예제 #1
0
def main():
    args = parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1

    if torch.cuda.is_available():
        torch.backends.cudnn.benchmark = True

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        synchronize()

    cfg.merge_from_file(args.config)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    logger = setup_logger('ssd', output_dir, get_rank())
    logger.info(f'Using {num_gpus} GPUs.')

    logger.info(f'Called with args:\n{args}')
    logger.info(f'Running with config:\n{cfg}')

    model = train(cfg, args.local_rank, args.distributed)

    run_test(cfg, model, args.distributed)
예제 #2
0
파일: test.py 프로젝트: mengzhangjian/SSD-1
def main():
    parser = argparse.ArgumentParser(
        description='SSD Evaluation on VOC and COCO dataset.')
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--ckpt",
        help=
        "The path to the checkpoint for test, default is the latest checkpoint.",
        default=None,
        type=str,
    )

    parser.add_argument("--output_dir",
                        default="eval_results",
                        type=str,
                        help="The directory to store evaluation results.")

    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if torch.cuda.is_available():
        # This flag allows you to enable the inbuilt cudnn auto-tuner to
        # find the best algorithm to use for your hardware.
        torch.backends.cudnn.benchmark = True
    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    logger = setup_logger("SSD", dist_util.get_rank(), cfg.OUTPUT_DIR)
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))
    evaluation(cfg, ckpt=args.ckpt, distributed=distributed)
예제 #3
0
파일: test_net.py 프로젝트: tkhe/ssd-family
def main():
    args = parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if torch.cuda.is_available():
        torch.backends.cudnn.benchmark = True

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend='nccl',
                                             init_method='env://')
        synchronize()

    cfg.merge_from_file(args.config)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ''
    logger = setup_logger('ssd', save_dir, get_rank())
    logger.info(f'Using {num_gpus} GPUs.')

    logger.info(f'Called with args:\n{args}')
    logger.info(f'Running with config:\n{cfg}')

    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = Checkpointer(model, save_dir=output_dir)
    ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt
    _ = checkpointer.load(ckpt, use_latest=args.ckpt is None)

    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            if not os.path.exists(output_folder):
                os.makedirs(output_folder)
            output_folders[idx] = output_folder

    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        inference(
            cfg,
            model,
            data_loader_val,
            dataset_name=dataset_name,
            device=device,
            output_dir=output_folder,
        )
        synchronize()
def main():
    parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector Training With PyTorch')
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument('--vgg', help='Pre-trained vgg model path, download from https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth')
    parser.add_argument('--resume', default=None, type=str, help='Checkpoint state_dict file to resume training from')
    parser.add_argument('--log_step', default=50, type=int, help='Print logs every log_step')
    parser.add_argument('--save_step', default=5000, type=int, help='Save checkpoint every save_step')
    parser.add_argument('--use_tensorboard', default=True, type=str2bool)
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()
    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1
    args.num_gpus = num_gpus

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl", init_method="env://")

    logger = setup_logger("SSD", distributed_util.get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    model = train(cfg, args)

    if not args.skip_test:
        logger.info('Start evaluating...')
        torch.cuda.empty_cache()  # speed up evaluating after training finished
        do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed)
예제 #5
0
def main():
    parser = argparse.ArgumentParser(
        description='SSD Evaluation on VOC and COCO dataset.')
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument("--weights", type=str, help="Trained weights.")
    parser.add_argument("--output_dir",
                        default="eval_results",
                        type=str,
                        help="The directory to store evaluation results.")

    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    logger = setup_logger("SSD", distributed_util.get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))
    evaluation(cfg,
               weights_file=args.weights,
               output_dir=args.output_dir,
               distributed=distributed)
예제 #6
0
def main():
    parser = argparse.ArgumentParser(
        description='SSD Evaluation on VOC and COCO dataset.')
    parser.add_argument(
        "config_file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument(
        "--ckpt",
        help=
        "The path to the checkpoint for test, default is the latest checkpoint.",
        default=None,
        type=str,
    )
    parser.add_argument("--N_images",
                        default=100,
                        type=int,
                        help="The number of images to check runtime with.")
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.TEST.BATCH_SIZE = 1
    cfg.freeze()

    logger = setup_logger("SSD", cfg.OUTPUT_DIR)
    logger.info(args)

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))
    evaluation(cfg, ckpt=args.ckpt, N_images=args.N_images)
예제 #7
0
def main():
    args = get_parser().parse_args()
    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()
    output_dir = pathlib.Path(cfg.OUTPUT_DIR)
    output_dir.mkdir(exist_ok=True, parents=True)

    logger = setup_logger("SSD", output_dir)
    logger.info(args)

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    model = start_train(cfg)

    logger.info('Start evaluating...')
    torch.cuda.empty_cache()  # speed up evaluating after training finished
    do_evaluation(cfg, model)
예제 #8
0
def main():
    parser = argparse.ArgumentParser(
        description='Single Shot MultiBox Detector Training With PyTorch')
    parser.add_argument(
        "config_file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()
    output_dir = pathlib.Path(cfg.OUTPUT_DIR)
    output_dir.mkdir(exist_ok=True, parents=True)

    logger = setup_logger("SSD", output_dir)
    logger.info(args)

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    model = start_train(cfg)

    logger.info('Start evaluating...')
    torch.cuda.empty_cache()  # speed up evaluating after training finished
    do_evaluation(cfg, model)
예제 #9
0
def main():
    parser = argparse.ArgumentParser(
        description='Single Shot MultiBox Detector Training With PyTorch')
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument('--log_step',
                        default=10,
                        type=int,
                        help='Print logs every log_step')
    parser.add_argument('--save_step',
                        default=2500,
                        type=int,
                        help='Save checkpoint every save_step')
    parser.add_argument(
        '--eval_step',
        default=2500,
        type=int,
        help='Evaluate dataset every eval_step, disabled when eval_step < 0')
    parser.add_argument('--use_tensorboard', default=True, type=str2bool)
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()
    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1
    args.num_gpus = num_gpus

    if torch.cuda.is_available():
        # This flag allows you to enable the inbuilt cudnn auto-tuner to
        # find the best algorithm to use for your hardware.
        torch.backends.cudnn.benchmark = True
    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    # Train distance regression network
    train_distance_regr()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    if cfg.OUTPUT_DIR:
        mkdir(cfg.OUTPUT_DIR)

    logger = setup_logger("SSD", dist_util.get_rank(), cfg.OUTPUT_DIR)
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    model = train(cfg, args)

    if not args.skip_test:
        logger.info('Start evaluating...')
        torch.cuda.empty_cache()  # speed up evaluating after training finished
        do_evaluation(cfg, model, distributed=args.distributed)
예제 #10
0
파일: train_active.py 프로젝트: mDuval1/SSD
def main():
    """
    python train.py --config-file ../SSD/configs/mobilenet_v2_ssd320_voc0712.yaml \
                    --log_step 10 \
                    --init_size 500 \
                    --query_size 100 \
                    --query_step 2 \
                    --train_step_per_query 50 \
                    --strategy uncertainty_aldod_sampling

    nohup python train.py --config-file ../SSD/configs/mobilenet_v2_ssd320_voc0712.yaml \
                    --log_step 10 \
                    --init_size 1000 \
                    --query_size 300 \
                    --query_step 10 \
                    --train_step_per_query 1000 \
                    --strategy uncertainty_aldod_sampling &     
    """
    parser = argparse.ArgumentParser(
        description='Single Shot MultiBox Detector Training With PyTorch')
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument('--log_step',
                        default=10,
                        type=int,
                        help='Print logs every log_step')
    parser.add_argument('--init_size',
                        default=1000,
                        type=int,
                        help='Number of initial labeled samples')
    parser.add_argument('--query_step',
                        default=10,
                        type=int,
                        help='Number of queries')
    parser.add_argument('--query_size',
                        default=300,
                        type=int,
                        help='Number of assets to query each time')
    parser.add_argument('--strategy',
                        default='random_sampling',
                        type=str,
                        help='Strategy to use to sample assets')
    parser.add_argument('--train_step_per_query',
                        default=500,
                        type=int,
                        help='Number of training steps after each query')
    parser.add_argument('--previous_queries',
                        default=None,
                        type=str,
                        help='Path to previous queries to use')
    parser.add_argument('--use_tensorboard', default=True, type=str2bool)
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()
    args.save_step = 10000000
    args.eval_step = 10000000

    np.random.seed(42)
    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1
    args.num_gpus = num_gpus

    if torch.cuda.is_available():
        # This flag allows you to enable the inbuilt cudnn auto-tuner to
        # find the best algorithm to use for your hardware.
        torch.backends.cudnn.benchmark = True
    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    time = datetime.now().strftime("%Y%m%d%H%M%S")
    experiment_dir = os.path.join(
        cfg.OUTPUT_DIR, f'results/{args.strategy}/experiment-{time}')
    args.result_dir = experiment_dir

    filename = os.path.join(experiment_dir, f'csv.txt')
    argspath = os.path.join(experiment_dir, f'args.pickle')
    querypath = os.path.join(experiment_dir, f'queries.txt')
    model_dir = os.path.join(experiment_dir, 'model')

    mkdir(experiment_dir)
    mkdir(model_dir)

    args.filename = filename
    args.querypath = querypath
    args.model_dir = model_dir
    fields = [
        'strategy', 'args', 'step', 'mAP', 'train_time', 'active_time',
        'total_time', 'total_samples', 'bboxes'
    ]
    with open(filename, 'w') as f:
        writer = csv.writer(f)
        writer.writerow(fields)
    with open(querypath, 'w') as f:
        writer = csv.writer(f)
        writer.writerow(['step', 'indices'])
    with open(argspath, 'wb') as f:
        pickle.dump(args, f)

    logger = setup_logger("SSD", dist_util.get_rank(), experiment_dir)
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    active_train(cfg, args)
예제 #11
0
def main():
    # 解析命令行 读取配置文件
    '''
    规定了模型的基本参数,训练的类,一共是20类加上背景所以是21
    模型的输入大小,为了不对原图造成影响,一般是填充为300*300的图像
    训练的文件夹路径2007和2012,测试的文件夹路径2007
    最大迭代次数为120000.学习率还有gamma的值,总之就是一系列的超参数
    输出的文件目录
    MODEL:
        NUM_CLASSES: 21
    INPUT:
        IMAGE_SIZE: 300
    DATASETS:
        TRAIN: ("voc_2007_trainval", "voc_2012_trainval")
        TEST: ("voc_2007_test", )
    SOLVER:
        MAX_ITER: 120000
        LR_STEPS: [80000, 100000]
        GAMMA: 0.1
        BATCH_SIZE: 32
        LR: 1e-3
    OUTPUT_DIR: 'outputs/vgg_ssd300_voc0712'
    Returns:
    '''
    parser = argparse.ArgumentParser(description='Single Shot MultiBox Detector Training With PyTorch')
    parser.add_argument(
        "--config-file",
        default="configs/vgg_ssd300_voc0712.yaml",
        # default="configs/vgg_ssd300_visdrone0413.yaml",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    # 每2500步保存一次文件,并且验证一次文件,记录是每10次记录一次,然后如果不想看tensor的记录的话,可以关闭,使用的是tensorboardX
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument('--log_step', default=10, type=int, help='Print logs every log_step')
    parser.add_argument('--save_step', default=2500, type=int, help='Save checkpoint every save_step')
    parser.add_argument('--eval_step', default=2500, type=int, help='Evaluate dataset every eval_step, disabled when eval_step < 0')
    parser.add_argument('--use_tensorboard', default=True, type=str2bool)
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    # 参数解析,可以使用多GPU进行训练
    args = parser.parse_args()
    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1
    args.num_gpus = num_gpus

    # 做一些启动前必要的检查
    if torch.cuda.is_available():
        # This flag allows you to enable the inbuilt cudnn auto-tuner to
        # find the best algorithm to use for your hardware.
        torch.backends.cudnn.benchmark = True
    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl", init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    # 创建模型输出文件夹
    if cfg.OUTPUT_DIR:
        mkdir(cfg.OUTPUT_DIR)

    # 使用logger来进行记录
    logger = setup_logger("SSD", dist_util.get_rank(), cfg.OUTPUT_DIR)
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    # 加载配置文件
    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    # 模型训练
    # model = train(cfg, args)
    model = train(cfg, args)

    # 开始进行验证
    if not args.skip_test:
        logger.info('Start evaluating...')
        torch.cuda.empty_cache()  # speed up evaluating after training finished
        do_evaluation(cfg, model, distributed=args.distributed)
예제 #12
0
def main():
    parser = argparse.ArgumentParser(description='self_ade on SSD')
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument('--weights',
                        default=None,
                        type=str,
                        help='Checkpoint state_dict file to use for self_ade')
    parser.add_argument(
        "--self_ade_iterations",
        default=50,
        type=int,
        help="Number of adaptation iterations to perform for each target")
    parser.add_argument("--num_workers",
                        default=4,
                        type=int,
                        help="Number of workers to use for data loaders")
    parser.add_argument("--learning_rate",
                        default=1e-3,
                        type=float,
                        help="Learning rate to be used for adaptation steps")
    parser.add_argument(
        "--self_ade_weight",
        default=0.8,
        type=float,
        help=
        "The weight to be applied to the loss of the self_ade adaptation task")
    parser.add_argument(
        "--warmup_steps",
        default=20,
        type=int,
        help="Steps to linearly increase learning rate from 0 to learning_rate"
    )
    parser.add_argument("--skip_no_self_ade_eval",
                        action='store_true',
                        help="Skips no self_ade evaluation for speed")

    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    setup_logger("SSD", 0)
    logger = setup_logger("self_ade", 0)

    logger.info(args)

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()
    logger.info("Loaded configuration file {}".format(args.config_file))

    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    setup_self_ade(cfg, args)
예제 #13
0
파일: train.py 프로젝트: shenmayufei/294-82
def main():
    parser = ArgumentParser(
        description="Single Shot MultiBox Detector Training With PyTorch")
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="config file name or path (relative to the configs/ folder) ",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument("--log_step",
                        default=50,
                        type=int,
                        help="Print logs every log_step")
    parser.add_argument("--save_step",
                        default=5000,
                        type=int,
                        help="Save checkpoint every save_step")
    parser.add_argument(
        "--eval_step",
        default=5000,
        type=int,
        help="Evaluate dataset every eval_step, disabled when eval_step < 0",
    )
    parser.add_argument("--use_tensorboard", default=True, type=str2bool)
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=REMAINDER,
    )
    parser.add_argument(
        "--resume_experiment",
        default="None",
        dest="resume",
        type=str,
        help="Checkpoint state_dict file to resume training from",
    )
    args = parser.parse_args()
    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1
    args.num_gpus = num_gpus

    if torch.cuda.is_available():
        # This flag allows you to enable the inbuilt cudnn auto-tuner to
        # find the best algorithm to use for your hardware.
        torch.backends.cudnn.benchmark = True
    else:
        cfg.MODEL.DEVICE = "cpu"
    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    eman = ExperimentManager("ssd")
    output_dir = eman.get_output_dir()

    args.config_file = str(
        Path(__file__).parent / "configs" / args.config_file)
    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.OUTPUT_DIR = str(output_dir)
    cfg.freeze()

    eman.start({"cfg": cfg, "args": vars(args)})
    # We use our own output dir, set by ExperimentManager:
    # if cfg.OUTPUT_DIR:
    #     mkdir(cfg.OUTPUT_DIR)

    logger = setup_logger("SSD", dist_util.get_rank(), output_dir / "logs")
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)
    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))
    logger.info(f"Output dir: {output_dir}")

    model_manager = {"best": None, "new": None}
    model = train(cfg, args, output_dir, model_manager)

    if not args.skip_test:
        logger.info("Start evaluating...")
        torch.cuda.empty_cache()  # speed up evaluating after training finished
        eval_results = do_evaluation(
            cfg,
            model,
            distributed=args.distributed,
        )
        do_best_model_checkpointing(
            cfg,
            output_dir / "model_final.pth",
            eval_results,
            model_manager,
            logger,
            is_final=True,
        )

    eman.mark_dir_if_complete()
def main():
    parser = argparse.ArgumentParser(
        description='ssd_fcn_multitask_text_detectior training with pytorch')
    parser.add_argument(
        "--config_file",
        default="./configs/icdar2015_incidental_scene_text_512.yaml",
        metavar="FILE",
        help="path to config file",
        type=str)
    # parser.add_argument("--config_file",default="./configs/synthtext.yaml",metavar="FILE",help="path to config file",type=str)
    parser.add_argument(
        '--vgg',
        default='./pretrained_on_imgnet/vgg16_reducedfc.pth',
        help=
        'Pre-trained vgg model path, download from https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth'
    )
    parser.add_argument(
        '--resume',
        default=
        "/home/binchengxiong/ssd_fcn_multitask_text_detection_pytorch1.0/output/ssd512_vgg_iteration_043000.pth",
        type=str,
        help='Checkpoint state_dict file to resume training from')
    parser.add_argument('--log_step',
                        default=50,
                        type=int,
                        help='Print logs every log_step')
    parser.add_argument('--save_step',
                        default=1000,
                        type=int,
                        help='Save checkpoint every save_step')
    parser.add_argument(
        '--eval_step',
        default=5000,
        type=int,
        help='Evaluate dataset every eval_step, disabled when eval_step < 0')
    parser.add_argument('--use_tensorboard', default=True, type=str2bool)
    parser.add_argument("--skip-test",
                        default=True,
                        dest="skip_test",
                        help="Do not test the final model",
                        action="store_true")
    parser.add_argument("opts",
                        help="Modify config options using the command-line",
                        default=None,
                        nargs=argparse.REMAINDER)
    args = parser.parse_args()
    num_gpus = 1
    args.num_gpus = num_gpus
    if torch.cuda.is_available():
        # This flag allows you to enable the inbuilt cudnn auto-tuner to
        # find the best algorithm to use for your hardware.
        torch.backends.cudnn.benchmark = True

    logger = setup_logger("SSD", distributed_util.get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    #freeze方法可以防止参数被后续进一步修改,ref:https://github.com/rbgirshick/yacs
    cfg.freeze()

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    model = train(cfg, args)
예제 #15
0
def main():
    parser = argparse.ArgumentParser(
        description='Single Shot MultiBox Detector Training With PyTorch')
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        '--vgg',
        help=
        'Pre-trained vgg model path, download from https://s3.amazonaws.com/amdegroot-models/vgg16_reducedfc.pth'
    )
    parser.add_argument(
        '--resume',
        default=None,
        type=str,
        help='Checkpoint state_dict file to resume training from')
    parser.add_argument('--log_step',
                        default=50,
                        type=int,
                        help='Print logs every log_step')
    parser.add_argument('--save_step',
                        default=5000,
                        type=int,
                        help='Save checkpoint every save_step')
    parser.add_argument(
        '--eval_step',
        default=0,
        type=int,
        help=
        'Evaluate dataset every eval_step, disabled when eval_step <= 0. Default: disabled'
    )
    parser.add_argument('--use_tensorboard', default=True, type=str2bool)
    parser.add_argument("--num_workers",
                        default=4,
                        type=int,
                        help="Number of workers to use for data loaders")
    parser.add_argument(
        "--eval_mode",
        default="test",
        type=str,
        help=
        'Use defined test datasets for periodic evaluation or use a validation split. Default: "test", alternative "val"'
    )
    parser.add_argument(
        "--return_best",
        default=False,
        type=str2bool,
        help=
        "If false (default) tests on the target the last model. If true tests on the target the model with the best performance on the validation set"
    )
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()
    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1
    args.num_gpus = num_gpus

    if torch.cuda.is_available():
        # This flag allows you to enable the inbuilt cudnn auto-tuner to
        # find the best algorithm to use for your hardware.
        torch.backends.cudnn.benchmark = True
    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

    logger = setup_logger("SSD", distributed_util.get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    if not os.path.exists(cfg.OUTPUT_DIR):
        if not args.distributed or (args.distributed
                                    and distributed_util.is_main_process()):
            os.makedirs(cfg.OUTPUT_DIR)

    model = train(cfg, args)

    if not args.skip_test:
        logger.info('Start evaluating...')
        torch.cuda.empty_cache()  # speed up evaluating after training finished
        do_evaluation(cfg, model, cfg.OUTPUT_DIR, distributed=args.distributed)