def test(cfg, model, distributed):
    if distributed:
        model = model.module
    torch.cuda.empty_cache()  # TODO check if it helps
    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
def train(cfg, local_rank, distributed):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    if distributed:
        model = torch.nn.parallel.deprecated.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(
        cfg, model, optimizer, scheduler, output_dir, save_to_disk
    )
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    do_train(
        model,
        data_loader,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        arguments,
    )

    return model
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--ckpt",
        help=
        "The path to the checkpoint for test, default is the latest checkpoint.",
        default=None,
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt
    _ = checkpointer.load(ckpt, use_latest=args.ckpt is None)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
Beispiel #4
0
def train(cfg, local_rank, distributed):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = torch.optim.lr_scheduler.StepLR(
        optimizer, step_size=1000,
        gamma=0.9)  #make_lr_scheduler(cfg, optimizer)

    # Initialize mixed-precision training
    use_mixed_precision = cfg.DTYPE == "float16"
    amp_opt_level = 'O1' if use_mixed_precision else 'O0'
    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level=amp_opt_level)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(
        cfg,
        model,
        None,
        None,
        output_dir,
        save_to_disk  # , optimizer, scheduler
    )
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    do_train(
        model,
        data_loader,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        arguments,
    )

    return model
def train(cfg, local_rank, distributed, use_tensorboard=False, logger=None):
    arguments = {"iteration": 0}
    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    if cfg.SOLVER.UNFREEZE_CONV_BODY:
        for p in model.backbone.parameters():
            p.requires_grad = True

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg,
                                         model,
                                         optimizer,
                                         scheduler,
                                         output_dir,
                                         save_to_disk,
                                         logger=logger)
    print(cfg.TRAIN.IGNORE_LIST)
    extra_checkpoint_data = checkpointer.load(
        cfg.MODEL.WEIGHT, ignore_list=cfg.TRAIN.IGNORE_LIST)
    arguments.update(extra_checkpoint_data)

    if cfg.SOLVER.KEEP_LR:
        optimizer = make_optimizer(cfg, model)
        scheduler = make_lr_scheduler(cfg, optimizer)

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD
    tensorboard_logdir = cfg.OUTPUT_DIR
    tensorboard_exp_name = cfg.TENSORBOARD_EXP_NAME
    snapshot = cfg.SOLVER.SNAPSHOT_ITERS

    do_train(model,
             data_loader,
             optimizer,
             scheduler,
             checkpointer,
             device,
             checkpoint_period,
             arguments,
             snapshot,
             tensorboard_logdir,
             tensorboard_exp_name,
             use_tensorboard=use_tensorboard)

    return model
def do_train(
    cfg,
    model,
    data_loader,
    data_loader_val,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    test_period,
    arguments,
):
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    dataset_names = cfg.DATASETS.TEST

    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):

        if any(len(target) < 1 for target in targets):
            logger.error(
                f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}"
            )
            continue
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        images = images.to(device)
        targets = [target.to(device) for target in targets]
        # 计算损失,model会计算images与targets的损失,并返回
        # 实际调用的是GeneralizedRCNN类的forward方法
        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        # 多GPU的时候,会有无效损失,去除这些损失
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()
        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        # amp的用法
        with amp.scale_loss(losses, optimizer) as scaled_losses:
            scaled_losses.backward()
        optimizer.step()
        # scheduler也需要step
        scheduler.step()
        # 每个batch要多久
        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)
        # 还需要多久
        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        # 每20次迭代或者最好一次迭代了,输出信息
        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,  # 还需要多久
                    iter=
                    iteration,  # 第几次迭代,是从maskrcnn_benchmark.utils.checkpoint记录的迭代次数开始算起
                    meters=str(
                        meters
                    ),  # 包含loss、loss_classifier、loss_box_reg、loss_objectness、loss_rpn_box_reg
                    lr=optimizer.param_groups[0]["lr"],  # 学习率
                    memory=torch.cuda.max_memory_allocated() / 1024.0 /
                    1024.0,  # 占用最大的GPU内存
                ))
        # 每隔checkpoint_period保存一次checkpoint
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if data_loader_val is not None and test_period > 0 and iteration % test_period == 0:
            meters_val = MetricLogger(delimiter="  ")
            synchronize()
            _ = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg,
                                 is_train=False,
                                 is_distributed=(get_world_size() > 1),
                                 is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False
                if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=None,
            )
            synchronize()
            model.train()
            with torch.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val,
                                    _) in enumerate(tqdm(data_loader_val)):
                    images_val = images_val.to(device)
                    targets_val = [target.to(device) for target in targets_val]
                    loss_dict = model(images_val, targets_val)
                    losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = reduce_loss_dict(loss_dict)
                    losses_reduced = sum(
                        loss for loss in loss_dict_reduced.values())
                    meters_val.update(loss=losses_reduced, **loss_dict_reduced)
            synchronize()
            logger.info(
                meters_val.delimiter.join([
                    "[Validation]: ",
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters_val),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        # 最后一次迭代的时候保存checkpoint
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)
    # train总共花了多少时间
    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
Beispiel #7
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument(
        "--json-file",
        default="",
        metavar="FILE",
        help="path to prediction bbox json file",
    )
    # parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    # num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    # distributed = num_gpus > 1

    # if distributed:
    #     torch.cuda.set_device(args.local_rank)
    #     torch.distributed.init_process_group(
    #         backend="nccl", init_method="env://"
    #     )
    #     synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    # logger.info("Using {} GPUs".format(num_gpus))
    # logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    # model = build_detection_model(cfg)
    # model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    # checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    # _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox", )
    # if cfg.MODEL.MASK_ON:
    #     iou_types = iou_types + ("segm",)
    # if cfg.MODEL.KEYPOINT_ON:
    #     iou_types = iou_types + ("keypoints",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=False)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        # inference(
        #     model,
        #     data_loader_val,
        #     dataset_name=dataset_name,
        #     iou_types=iou_types,
        #     box_only=False if cfg.MODEL.FCOS_ON or cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
        #     device=cfg.MODEL.DEVICE,
        #     expected_results=cfg.TEST.EXPECTED_RESULTS,
        #     expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
        #     output_folder=output_folder,
        # )

        # extra_args = dict(
        #     box_only=False,
        #     iou_types=iou_types,
        #     expected_results=cfg.TEST.EXPECTED_RESULTS,
        #     expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
        # )
        dataset = data_loader_val.dataset

        # evaluate(dataset=dataset,
        #         predictions=predictions,
        #         output_folder=output_folder,
        #         only_human=True,
        #         **extra_args)

        do_coco_json_evaluation(
            dataset=dataset,
            json_file=args.json_file,
            box_only=False,
            output_folder=output_folder,
            iou_types=iou_types,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL)

        synchronize()
Beispiel #8
0
def train(cfg, local_rank, distributed, logger):
    if is_main_process():
        wandb.init(project='scene-graph',
                   entity='sgg-speaker-listener',
                   config=cfg.LISTENER)
    debug_print(logger, 'prepare training')

    model = build_detection_model(cfg)
    listener = build_listener(cfg)

    speaker_listener = SpeakerListener(model,
                                       listener,
                                       cfg,
                                       is_joint=cfg.LISTENER.JOINT)
    if is_main_process():
        wandb.watch(listener)

    debug_print(logger, 'end model construction')

    # modules that should be always set in eval mode
    # their eval() method should be called after model.train() is called
    eval_modules = (
        model.rpn,
        model.backbone,
        model.roi_heads.box,
    )

    fix_eval_modules(eval_modules)

    # NOTE, we slow down the LR of the layers start with the names in slow_heads
    if cfg.MODEL.ROI_RELATION_HEAD.PREDICTOR == "IMPPredictor":
        slow_heads = [
            "roi_heads.relation.box_feature_extractor",
            "roi_heads.relation.union_feature_extractor.feature_extractor",
        ]
    else:
        slow_heads = []

    # load pretrain layers to new layers
    load_mapping = {
        "roi_heads.relation.box_feature_extractor":
        "roi_heads.box.feature_extractor",
        "roi_heads.relation.union_feature_extractor.feature_extractor":
        "roi_heads.box.feature_extractor"
    }

    if cfg.MODEL.ATTRIBUTE_ON:
        load_mapping[
            "roi_heads.relation.att_feature_extractor"] = "roi_heads.attribute.feature_extractor"
        load_mapping[
            "roi_heads.relation.union_feature_extractor.att_feature_extractor"] = "roi_heads.attribute.feature_extractor"

    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)
    listener.to(device)

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    num_batch = cfg.SOLVER.IMS_PER_BATCH

    optimizer = make_optimizer(cfg,
                               model,
                               logger,
                               slow_heads=slow_heads,
                               slow_ratio=10.0,
                               rl_factor=float(num_batch))
    listener_optimizer = make_listener_optimizer(cfg, listener)
    scheduler = make_lr_scheduler(cfg, optimizer, logger)
    listener_scheduler = None
    debug_print(logger, 'end optimizer and schedule')

    if cfg.LISTENER.JOINT:
        speaker_listener_optimizer = make_speaker_listener_optimizer(
            cfg, speaker_listener.speaker, speaker_listener.listener)

    # Initialize mixed-precision training
    use_mixed_precision = cfg.DTYPE == "float16"
    amp_opt_level = 'O1' if use_mixed_precision else 'O0'

    if cfg.LISTENER.JOINT:
        speaker_listener, speaker_listener_optimizer = amp.initialize(
            speaker_listener, speaker_listener_optimizer, opt_level='O0')
    else:
        speaker_listener, listener_optimizer = amp.initialize(
            speaker_listener, listener_optimizer, opt_level='O0')

    #listener, listener_optimizer = amp.initialize(listener, listener_optimizer, opt_level='O0')
    #[model, listener], [optimizer, listener_optimizer] = amp.initialize([model, listener], [optimizer, listener_optimizer], opt_level='O1', loss_scale=1)
    #model = amp.initialize(model, opt_level='O1')

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
            find_unused_parameters=True,
        )

        listener = torch.nn.parallel.DistributedDataParallel(
            listener,
            device_ids=[local_rank],
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
            find_unused_parameters=True,
        )

    debug_print(logger, 'end distributed')
    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR
    listener_dir = cfg.LISTENER_DIR
    save_to_disk = get_rank() == 0

    speaker_checkpointer = DetectronCheckpointer(cfg,
                                                 model,
                                                 optimizer,
                                                 scheduler,
                                                 output_dir,
                                                 save_to_disk,
                                                 custom_scheduler=True)

    listener_checkpointer = Checkpointer(listener,
                                         optimizer=listener_optimizer,
                                         save_dir=listener_dir,
                                         save_to_disk=save_to_disk,
                                         custom_scheduler=False)

    speaker_listener.add_listener_checkpointer(listener_checkpointer)
    speaker_listener.add_speaker_checkpointer(speaker_checkpointer)

    speaker_listener.load_listener()
    speaker_listener.load_speaker(load_mapping=load_mapping)
    debug_print(logger, 'end load checkpointer')
    train_data_loader = make_data_loader(cfg,
                                         mode='train',
                                         is_distributed=distributed,
                                         start_iter=arguments["iteration"],
                                         ret_images=True)
    val_data_loaders = make_data_loader(cfg,
                                        mode='val',
                                        is_distributed=distributed,
                                        ret_images=True)

    debug_print(logger, 'end dataloader')
    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    if cfg.SOLVER.PRE_VAL:
        logger.info("Validate before training")
        #output =  run_val(cfg, model, listener, val_data_loaders, distributed, logger)
        #print('OUTPUT: ', output)
        #(sg_loss, img_loss, sg_acc, img_acc) = output

    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(train_data_loader)
    start_iter = arguments["iteration"]
    start_training_time = time.time()
    end = time.time()

    print_first_grad = True

    listener_loss_func = torch.nn.MarginRankingLoss(margin=1, reduction='none')
    mistake_saver = None
    if is_main_process():
        ds_catalog = DatasetCatalog()
        dict_file_path = os.path.join(
            ds_catalog.DATA_DIR,
            ds_catalog.DATASETS['VG_stanford_filtered_with_attribute']
            ['dict_file'])
        ind_to_classes, ind_to_predicates = load_vg_info(dict_file_path)
        ind_to_classes = {k: v for k, v in enumerate(ind_to_classes)}
        ind_to_predicates = {k: v for k, v in enumerate(ind_to_predicates)}
        print('ind to classes:', ind_to_classes, '/n ind to predicates:',
              ind_to_predicates)
        mistake_saver = MistakeSaver(
            '/Scene-Graph-Benchmark.pytorch/filenames_masked', ind_to_classes,
            ind_to_predicates)

    #is_printed = False
    while True:
        try:
            listener_iteration = 0
            for iteration, (images, targets,
                            image_ids) in enumerate(train_data_loader,
                                                    start_iter):

                if cfg.LISTENER.JOINT:
                    speaker_listener_optimizer.zero_grad()
                else:
                    listener_optimizer.zero_grad()

                #print(f'ITERATION NUMBER: {iteration}')
                if any(len(target) < 1 for target in targets):
                    logger.error(
                        f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}"
                    )
                if len(images) <= 1:
                    continue

                data_time = time.time() - end
                iteration = iteration + 1
                listener_iteration += 1
                arguments["iteration"] = iteration
                model.train()
                fix_eval_modules(eval_modules)
                images_list = deepcopy(images)
                images_list = to_image_list(
                    images_list, cfg.DATALOADER.SIZE_DIVISIBILITY).to(device)

                for i in range(len(images)):
                    images[i] = images[i].unsqueeze(0)
                    images[i] = F.interpolate(images[i],
                                              size=(224, 224),
                                              mode='bilinear',
                                              align_corners=False)
                    images[i] = images[i].squeeze()

                images = torch.stack(images).to(device)
                #images.requires_grad_()

                targets = [target.to(device) for target in targets]

                speaker_loss_dict = {}
                if not cfg.LISTENER.JOINT:
                    score_matrix = speaker_listener(images_list, targets,
                                                    images)
                else:
                    score_matrix, _, speaker_loss_dict = speaker_listener(
                        images_list, targets, images)

                speaker_summed_losses = sum(
                    loss for loss in speaker_loss_dict.values())

                # reduce losses over all GPUs for logging purposes
                if not not cfg.LISTENER.JOINT:
                    speaker_loss_dict_reduced = reduce_loss_dict(
                        speaker_loss_dict)
                    speaker_losses_reduced = sum(
                        loss for loss in speaker_loss_dict_reduced.values())
                    speaker_losses_reduced /= num_gpus

                    if is_main_process():
                        wandb.log(
                            {"Train Speaker Loss": speaker_losses_reduced},
                            listener_iteration)

                listener_loss = 0
                gap_reward = 0
                avg_acc = 0
                num_correct = 0

                score_matrix = score_matrix.to(device)
                # fill loss matrix
                loss_matrix = torch.zeros((2, images.size(0), images.size(0)),
                                          device=device)
                # sg centered scores
                for true_index in range(loss_matrix.size(1)):
                    row_score = score_matrix[true_index]
                    (true_scores, predicted_scores,
                     binary) = format_scores(row_score, true_index, device)
                    loss_vec = listener_loss_func(true_scores,
                                                  predicted_scores, binary)
                    loss_matrix[0][true_index] = loss_vec
                # image centered scores
                transposted_score_matrix = score_matrix.t()
                for true_index in range(loss_matrix.size(1)):
                    row_score = transposted_score_matrix[true_index]
                    (true_scores, predicted_scores,
                     binary) = format_scores(row_score, true_index, device)
                    loss_vec = listener_loss_func(true_scores,
                                                  predicted_scores, binary)
                    loss_matrix[1][true_index] = loss_vec

                print('iteration:', listener_iteration)
                sg_acc = 0
                img_acc = 0
                # calculate accuracy
                for i in range(loss_matrix.size(1)):
                    temp_sg_acc = 0
                    temp_img_acc = 0
                    for j in range(loss_matrix.size(2)):
                        if loss_matrix[0][i][i] > loss_matrix[0][i][j]:
                            temp_sg_acc += 1
                        else:
                            if cfg.LISTENER.HTML:
                                if is_main_process(
                                ) and listener_iteration >= 600 and listener_iteration % 25 == 0 and i != j:
                                    detached_sg_i = (sgs[i][0].detach(),
                                                     sgs[i][1],
                                                     sgs[i][2].detach())
                                    detached_sg_j = (sgs[j][0].detach(),
                                                     sgs[j][1],
                                                     sgs[j][2].detach())
                                    mistake_saver.add_mistake(
                                        (image_ids[i], image_ids[j]),
                                        (detached_sg_i, detached_sg_j),
                                        listener_iteration, 'SG')
                        if loss_matrix[1][i][i] > loss_matrix[1][j][i]:
                            temp_img_acc += 1
                        else:
                            if cfg.LISTENER.HTML:
                                if is_main_process(
                                ) and listener_iteration >= 600 and listener_iteration % 25 == 0 and i != j:
                                    detached_sg_i = (sgs[i][0].detach(),
                                                     sgs[i][1],
                                                     sgs[i][2].detach())
                                    detached_sg_j = (sgs[j][0].detach(),
                                                     sgs[j][1],
                                                     sgs[j][2].detach())
                                    mistake_saver.add_mistake(
                                        (image_ids[i], image_ids[j]),
                                        (detached_sg_i, detached_sg_j),
                                        listener_iteration, 'IMG')

                    temp_sg_acc = temp_sg_acc * 100 / (loss_matrix.size(1) - 1)
                    temp_img_acc = temp_img_acc * 100 / (loss_matrix.size(1) -
                                                         1)
                    sg_acc += temp_sg_acc
                    img_acc += temp_img_acc
                if cfg.LISTENER.HTML:
                    if is_main_process(
                    ) and listener_iteration % 100 == 0 and listener_iteration >= 600:
                        mistake_saver.toHtml('/www')

                sg_acc /= loss_matrix.size(1)
                img_acc /= loss_matrix.size(1)

                avg_sg_acc = torch.tensor([sg_acc]).to(device)
                avg_img_acc = torch.tensor([img_acc]).to(device)
                # reduce acc over all gpus
                avg_acc = {'sg_acc': avg_sg_acc, 'img_acc': avg_img_acc}
                avg_acc_reduced = reduce_loss_dict(avg_acc)

                sg_acc = sum(acc for acc in avg_acc_reduced['sg_acc'])
                img_acc = sum(acc for acc in avg_acc_reduced['img_acc'])

                # log acc to wadb
                if is_main_process():
                    wandb.log({
                        "Train SG Accuracy": sg_acc.item(),
                        "Train IMG Accuracy": img_acc.item()
                    })

                sg_loss = 0
                img_loss = 0

                for i in range(loss_matrix.size(0)):
                    for j in range(loss_matrix.size(1)):
                        loss_matrix[i][j][j] = 0.

                for i in range(loss_matrix.size(1)):
                    sg_loss += torch.max(loss_matrix[0][i])
                    img_loss += torch.max(loss_matrix[1][:][i])

                sg_loss = sg_loss / loss_matrix.size(1)
                img_loss = img_loss / loss_matrix.size(1)
                sg_loss = sg_loss.to(device)
                img_loss = img_loss.to(device)

                loss_dict = {'sg_loss': sg_loss, 'img_loss': img_loss}

                losses = sum(loss for loss in loss_dict.values())

                # reduce losses over all GPUs for logging purposes
                loss_dict_reduced = reduce_loss_dict(loss_dict)
                sg_loss_reduced = loss_dict_reduced['sg_loss']
                img_loss_reduced = loss_dict_reduced['img_loss']
                if is_main_process():
                    wandb.log({"Train SG Loss": sg_loss_reduced})
                    wandb.log({"Train IMG Loss": img_loss_reduced})

                losses_reduced = sum(loss
                                     for loss in loss_dict_reduced.values())
                meters.update(loss=losses_reduced, **loss_dict_reduced)

                losses = losses + speaker_summed_losses * cfg.LISTENER.LOSS_COEF
                # Note: If mixed precision is not used, this ends up doing nothing
                # Otherwise apply loss scaling for mixed-precision recipe
                #losses.backward()
                if not cfg.LISTENER.JOINT:
                    with amp.scale_loss(losses,
                                        listener_optimizer) as scaled_losses:
                        scaled_losses.backward()
                else:
                    with amp.scale_loss(
                            losses,
                            speaker_listener_optimizer) as scaled_losses:
                        scaled_losses.backward()

                verbose = (iteration % cfg.SOLVER.PRINT_GRAD_FREQ
                           ) == 0 or print_first_grad  # print grad or not
                print_first_grad = False
                #clip_grad_value([(n, p) for n, p in listener.named_parameters() if p.requires_grad], cfg.LISTENER.CLIP_VALUE, logger=logger, verbose=True, clip=True)
                if not cfg.LISTENER.JOINT:
                    listener_optimizer.step()
                else:
                    speaker_listener_optimizer.step()

                batch_time = time.time() - end
                end = time.time()
                meters.update(time=batch_time, data=data_time)

                eta_seconds = meters.time.global_avg * (max_iter - iteration)
                eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

                if cfg.LISTENER.JOINT:
                    if iteration % 200 == 0 or iteration == max_iter:
                        logger.info(
                            meters.delimiter.join([
                                "eta: {eta}",
                                "iter: {iter}",
                                "{meters}",
                                "lr: {lr:.6f}",
                                "max mem: {memory:.0f}",
                            ]).format(
                                eta=eta_string,
                                iter=iteration,
                                meters=str(meters),
                                lr=speaker_listener_optimizer.param_groups[-1]
                                ["lr"],
                                memory=torch.cuda.max_memory_allocated() /
                                1024.0 / 1024.0,
                            ))
                else:
                    if iteration % 200 == 0 or iteration == max_iter:
                        logger.info(
                            meters.delimiter.join([
                                "eta: {eta}",
                                "iter: {iter}",
                                "{meters}",
                                "lr: {lr:.6f}",
                                "max mem: {memory:.0f}",
                            ]).format(
                                eta=eta_string,
                                iter=iteration,
                                meters=str(meters),
                                lr=listener_optimizer.param_groups[-1]["lr"],
                                memory=torch.cuda.max_memory_allocated() /
                                1024.0 / 1024.0,
                            ))

                if iteration % checkpoint_period == 0:
                    """
                    print('Model before save')
                    print('****************************')
                    print(listener.gnn.conv1.node_model.node_mlp_1[0].weight)
                    print('****************************')
                    """
                    if not cfg.LISTENER.JOINT:
                        listener_checkpointer.save(
                            "model_{:07d}".format(listener_iteration),
                            amp=amp.state_dict())
                    else:
                        speaker_checkpointer.save(
                            "model_speaker{:07d}".format(iteration))
                        listener_checkpointer.save(
                            "model_listenr{:07d}".format(listener_iteration),
                            amp=amp.state_dict())
                if iteration == max_iter:
                    if not cfg.LISTENER.JOINT:
                        listener_checkpointer.save(
                            "model_{:07d}".format(listener_iteration),
                            amp=amp.state_dict())
                    else:
                        speaker_checkpointer.save(
                            "model_{:07d}".format(iteration))
                        listener_checkpointer.save(
                            "model_{:07d}".format(listener_iteration),
                            amp=amp.state_dict())

                val_result = None  # used for scheduler updating
                if cfg.SOLVER.TO_VAL and iteration % cfg.SOLVER.VAL_PERIOD == 0:
                    logger.info("Start validating")
                    val_result = run_val(cfg, model, listener,
                                         val_data_loaders, distributed, logger)
                    (sg_loss, img_loss, sg_acc, img_acc,
                     speaker_val) = val_result

                    if is_main_process():
                        wandb.log({
                            "Validation SG Accuracy": sg_acc,
                            "Validation IMG Accuracy": img_acc,
                            "Validation SG Loss": sg_loss,
                            "Validation IMG Loss": img_loss,
                            "Validation Speaker": speaker_val,
                        })

                    #logger.info("Validation Result: %.4f" % val_result)
        except Exception as err:
            raise (err)
            print('Dataset finished, creating new')
            train_data_loader = make_data_loader(
                cfg,
                mode='train',
                is_distributed=distributed,
                start_iter=arguments["iteration"],
                ret_images=True)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
    return listener
def do_train(
    cfg,
    model,
    data_loader,
    data_loader_val,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    test_period,
    arguments,
    writer,
):
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    dataset_names = cfg.DATASETS.TEST

    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):

        if any(len(target) < 1 for target in targets):
            logger.error(
                f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}"
            )
            continue
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()

        # # Add images every 100 iterations
        if iteration % 100 == 0:
            #     # Display images
            #     image = images.tensors[0].cpu().numpy()
            #     means = np.zeros((image.shape[0], image.shape[1], image.shape[2]))
            #     means[0] = 102.9801
            #     means[1] = 115.9465
            #     means[2] = 122.7717
            #     image = image + means
            #     image = image[[2, 1, 0]].astype(np.uint8)

            #     writer.add_image('input image', image, iteration)

            #     for b in range(len(targets[0].bbox)):
            #         box = targets[0].bbox[b]
            #         x1 = np.around(box[0].cpu().numpy())
            #         y1 = np.around(box[1].cpu().numpy())
            #         x2 = np.around(box[2].cpu().numpy())
            #         y2 = np.around(box[3].cpu().numpy())
            #         rr, cc = rectangle_perimeter(y1, x1, y2-y1, x2-x1)
            #         image[:, rr, cc] = 255

            #     writer.add_image('target boxes', image, iteration)

            #     # Display masks
            #     masks = targets[0].get_field('masks')[0]
            #     masks = masks.get_mask_tensor()
            #     combined_mask = masks[0, :, :]
            #     for i in range(1,8):
            #         combined_mask = combined_mask | masks[i, :, :]
            #     writer.add_image('mask', combined_mask.unsqueeze(0)*255, iteration)
            # writer.add_image('single part 2', masks[1, :, :].unsqueeze(0)*255, iteration)
            # writer.add_image('single part 3', masks[2, :, :].unsqueeze(0)*255, iteration)
            # writer.add_image('single part 4', masks[3, :, :].unsqueeze(0)*255, iteration)
            # writer.add_image('single part 5', masks[4, :, :].unsqueeze(0)*255, iteration)
            # writer.add_image('single part 6', masks[5, :, :].unsqueeze(0)*255, iteration)
            # writer.add_image('single part 7', masks[6, :, :].unsqueeze(0)*255, iteration)
            # writer.add_image('single part 8', masks[7, :, :].unsqueeze(0)*255, iteration)

            # Display Losses
            writer.add_scalar('loss', meters.loss.median, iteration)
            writer.add_scalar('loss_classifier',
                              loss_dict_reduced['loss_classifier'].item(),
                              iteration)
            writer.add_scalar('loss_box_reg',
                              loss_dict_reduced['loss_box_reg'].item(),
                              iteration)
            writer.add_scalar('loss_objectness',
                              loss_dict_reduced['loss_objectness'].item(),
                              iteration)
            writer.add_scalar('loss_rpn_box_reg',
                              loss_dict_reduced['loss_rpn_box_reg'].item(),
                              iteration)
            writer.add_scalar('loss_mask',
                              loss_dict_reduced['loss_mask'].item(), iteration)
            writer.add_scalar('loss_kpt', loss_dict_reduced['loss_kp'].item(),
                              iteration)
            writer.add_scalar('lr', optimizer.param_groups[0]['lr'], iteration)

        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        with amp.scale_loss(losses, optimizer) as scaled_losses:
            scaled_losses.backward()
        optimizer.step()
        scheduler.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if data_loader_val is not None and test_period > 0 and iteration % test_period == 0:
            meters_val = MetricLogger(delimiter="  ")
            synchronize()
            _ = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg,
                                 is_train=False,
                                 is_distributed=(get_world_size() > 1),
                                 is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False
                if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=None,
            )
            synchronize()
            model.train()
            with torch.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val,
                                    _) in enumerate(tqdm(data_loader_val)):
                    images_val = images_val.to(device)
                    targets_val = [target.to(device) for target in targets_val]
                    loss_dict = model(images_val, targets_val)
                    losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = reduce_loss_dict(loss_dict)
                    losses_reduced = sum(
                        loss for loss in loss_dict_reduced.values())
                    meters_val.update(loss=losses_reduced, **loss_dict_reduced)
            synchronize()
            logger.info(
                meters_val.delimiter.join([
                    "[Validation]: ",
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters_val),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
Beispiel #10
0
def train(cfg, local_rank, distributed):
    # Model logging
    print_mlperf(key=mlperf_log.INPUT_BATCH_SIZE, value=cfg.SOLVER.IMS_PER_BATCH)
    print_mlperf(key=mlperf_log.BATCH_SIZE_TEST, value=cfg.TEST.IMS_PER_BATCH)

    print_mlperf(key=mlperf_log.INPUT_MEAN_SUBTRACTION, value = cfg.INPUT.PIXEL_MEAN)
    print_mlperf(key=mlperf_log.INPUT_NORMALIZATION_STD, value=cfg.INPUT.PIXEL_STD)
    print_mlperf(key=mlperf_log.INPUT_RESIZE)
    print_mlperf(key=mlperf_log.INPUT_RESIZE_ASPECT_PRESERVING)
    print_mlperf(key=mlperf_log.MIN_IMAGE_SIZE, value=cfg.INPUT.MIN_SIZE_TRAIN)
    print_mlperf(key=mlperf_log.MAX_IMAGE_SIZE, value=cfg.INPUT.MAX_SIZE_TRAIN)
    print_mlperf(key=mlperf_log.INPUT_RANDOM_FLIP)
    print_mlperf(key=mlperf_log.RANDOM_FLIP_PROBABILITY, value=0.5)
    print_mlperf(key=mlperf_log.FG_IOU_THRESHOLD, value=cfg.MODEL.RPN.FG_IOU_THRESHOLD)
    print_mlperf(key=mlperf_log.BG_IOU_THRESHOLD, value=cfg.MODEL.RPN.BG_IOU_THRESHOLD)
    print_mlperf(key=mlperf_log.RPN_PRE_NMS_TOP_N_TRAIN, value=cfg.MODEL.RPN.PRE_NMS_TOP_N_TRAIN)
    print_mlperf(key=mlperf_log.RPN_PRE_NMS_TOP_N_TEST, value=cfg.MODEL.RPN.PRE_NMS_TOP_N_TEST)
    print_mlperf(key=mlperf_log.RPN_POST_NMS_TOP_N_TRAIN, value=cfg.MODEL.RPN.FPN_POST_NMS_TOP_N_TRAIN)
    print_mlperf(key=mlperf_log.RPN_POST_NMS_TOP_N_TEST, value=cfg.MODEL.RPN.FPN_POST_NMS_TOP_N_TEST)
    print_mlperf(key=mlperf_log.ASPECT_RATIOS, value=cfg.MODEL.RPN.ASPECT_RATIOS)
    print_mlperf(key=mlperf_log.BACKBONE, value=cfg.MODEL.BACKBONE.CONV_BODY)
    print_mlperf(key=mlperf_log.NMS_THRESHOLD, value=cfg.MODEL.RPN.NMS_THRESH)
    # /root/ssy/ssynew/maskrcnn-benchmark/maskrcnn_benchmark/modeling/detector/detectors.py
    # building bare mode without doing anthing
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    # Optimizer logging
    print_mlperf(key=mlperf_log.OPT_NAME, value=mlperf_log.SGD_WITH_MOMENTUM)
    print_mlperf(key=mlperf_log.OPT_LR, value=cfg.SOLVER.BASE_LR)
    print_mlperf(key=mlperf_log.OPT_MOMENTUM, value=cfg.SOLVER.MOMENTUM)
    print_mlperf(key=mlperf_log.OPT_WEIGHT_DECAY, value=cfg.SOLVER.WEIGHT_DECAY)


    scheduler = make_lr_scheduler(cfg, optimizer)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR
    print("output_dir "+str(output_dir))

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(
        cfg, model, optimizer, scheduler, output_dir, save_to_disk
    )
    # no such SAVE_CHECKPOINTS
    #arguments["save_checkpoints"] = cfg.SAVE_CHECKPOINTS
    arguments["save_checkpoints"] = False

    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader, iters_per_epoch = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"]
    )
    print("SSY iters_per_epoch "+str(iters_per_epoch))
    #print("SSY iters_per_epoch change to 100 ")
    #iters_per_epoch = 100

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    # set the callback function to evaluate and potentially
    # early exit each epoch
    # SSY
    # I already add PER_EPOCH_EVAL and MIN_BBOX_MAP MIN_SEGM_MAP to  ./configs/e2e_mask_rcnn_R_50_FPN_1x.yaml
    # but it still can not find it
    # so I manually set them here
    #if cfg.PER_EPOCH_EVAL:
    #    per_iter_callback_fn = functools.partial(
    #            mlperf_test_early_exit,
    #            iters_per_epoch=iters_per_epoch,
    #            tester=functools.partial(test, cfg=cfg),
    #            model=model,
    #            distributed=distributed,
    #            min_bbox_map=cfg.MLPERF.MIN_BBOX_MAP,
    #            min_segm_map=cfg.MLPERF.MIN_SEGM_MAP)
    #else:
    #    per_iter_callback_fn = None
    per_iter_callback_fn = functools.partial(
            mlperf_test_early_exit,
            iters_per_epoch=iters_per_epoch,
            # /root/ssy/ssynew/maskrcnn-benchmark/maskrcnn_benchmark/engine/tester.py
            tester=functools.partial(test, cfg=cfg),
            model=model,
            distributed=distributed,
            min_bbox_map=0.377,
            min_segm_map=0.339)

    start_train_time = time.time()
    # /root/ssy/ssynew/maskrcnn-benchmark/maskrcnn_benchmark/engine/trainer.py
    do_train(
        model,
        data_loader,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        arguments,
        per_iter_start_callback_fn=functools.partial(mlperf_log_epoch_start, iters_per_epoch=iters_per_epoch),
        per_iter_end_callback_fn=per_iter_callback_fn,
    )

    end_train_time = time.time()
    total_training_time = end_train_time - start_train_time
    print(
            "&&&& MLPERF METRIC THROUGHPUT per GPU={:.4f} iterations / s".format((arguments["iteration"] * 1.0) / total_training_time)
    )

    return model
Beispiel #11
0
def train(cfg, local_rank, distributed, logger):
    debug_print(logger, 'prepare training')
    model = build_detection_model(cfg) 
    debug_print(logger, 'end model construction')

    # modules that should be always set in eval mode
    # their eval() method should be called after model.train() is called
    eval_modules = (model.rpn, model.backbone, model.roi_heads.box,)
 
    fix_eval_modules(eval_modules)

    # NOTE, we slow down the LR of the layers start with the names in slow_heads
    if cfg.MODEL.ROI_RELATION_HEAD.PREDICTOR == "IMPPredictor":
        slow_heads = ["roi_heads.relation.box_feature_extractor",
                      "roi_heads.relation.union_feature_extractor.feature_extractor",]
    else:
        slow_heads = []

    # load pretrain layers to new layers
    load_mapping = {"roi_heads.relation.box_feature_extractor" : "roi_heads.box.feature_extractor",
                    "roi_heads.relation.union_feature_extractor.feature_extractor" : "roi_heads.box.feature_extractor"}
    
    if cfg.MODEL.ATTRIBUTE_ON:
        load_mapping["roi_heads.relation.att_feature_extractor"] = "roi_heads.attribute.feature_extractor"
        load_mapping["roi_heads.relation.union_feature_extractor.att_feature_extractor"] = "roi_heads.attribute.feature_extractor"

    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    num_batch = cfg.SOLVER.IMS_PER_BATCH
    optimizer = make_optimizer(cfg, model, logger, slow_heads=slow_heads, slow_ratio=10.0, rl_factor=float(num_batch))
    scheduler = make_lr_scheduler(cfg, optimizer, logger)
    debug_print(logger, 'end optimizer and shcedule')
    # Initialize mixed-precision training
    use_mixed_precision = cfg.DTYPE == "float16"
    amp_opt_level = 'O1' if use_mixed_precision else 'O0'
    model, optimizer = amp.initialize(model, optimizer, opt_level=amp_opt_level)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
            find_unused_parameters=True,
        )
    debug_print(logger, 'end distributed')
    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(
        cfg, model, optimizer, scheduler, output_dir, save_to_disk, custom_scheduler=True
    )
    # if there is certain checkpoint in output_dir, load it, else load pretrained detector
    if checkpointer.has_checkpoint():
        extra_checkpoint_data = checkpointer.load(cfg.MODEL.PRETRAINED_DETECTOR_CKPT, 
                                       update_schedule=cfg.SOLVER.UPDATE_SCHEDULE_DURING_LOAD)
        arguments.update(extra_checkpoint_data)
        if cfg.SOLVER.UPDATE_SCHEDULE_DURING_LOAD:
            checkpointer.scheduler.last_epoch = extra_checkpoint_data["iteration"]
            logger.info("update last epoch of scheduler to iter: {}".format(str(extra_checkpoint_data["iteration"])))
    else:
        # load_mapping is only used when we init current model from detection model.
        checkpointer.load(cfg.MODEL.PRETRAINED_DETECTOR_CKPT, with_optim=False, load_mapping=load_mapping)
    debug_print(logger, 'end load checkpointer')
    train_data_loader = make_data_loader(
        cfg,
        mode='train',
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )
    val_data_loaders = make_data_loader(
        cfg,
        mode='val',
        is_distributed=distributed,
    )
    debug_print(logger, 'end dataloader')
    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    if cfg.SOLVER.PRE_VAL:
        logger.info("Validate before training")
        run_val(cfg, model, val_data_loaders, distributed, logger)

    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(train_data_loader)
    start_iter = arguments["iteration"]
    start_training_time = time.time()
    end = time.time()

    print_first_grad = True
    for iteration, (images, targets, _) in enumerate(train_data_loader, start_iter):
        if any(len(target) < 1 for target in targets):
            logger.error(f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" )
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        model.train()
        fix_eval_modules(eval_modules)

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()
        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        with amp.scale_loss(losses, optimizer) as scaled_losses:
            scaled_losses.backward()
        
        # add clip_grad_norm from MOTIFS, tracking gradient, used for debug
        verbose = (iteration % cfg.SOLVER.PRINT_GRAD_FREQ) == 0 or print_first_grad # print grad or not
        print_first_grad = False
        clip_grad_norm([(n, p) for n, p in model.named_parameters() if p.requires_grad], max_norm=cfg.SOLVER.GRAD_NORM_CLIP, logger=logger, verbose=verbose, clip=True)

        optimizer.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 200 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join(
                    [
                        "eta: {eta}",
                        "iter: {iter}",
                        "{meters}",
                        "lr: {lr:.6f}",
                        "max mem: {memory:.0f}",
                    ]
                ).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[-1]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                )
            )

        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

        val_result = None # used for scheduler updating
        if cfg.SOLVER.TO_VAL and iteration % cfg.SOLVER.VAL_PERIOD == 0:
            logger.info("Start validating")
            val_result = run_val(cfg, model, val_data_loaders, distributed, logger)
            logger.info("Validation Result: %.4f" % val_result)
 
        # scheduler should be called after optimizer.step() in pytorch>=1.1.0
        # https://pytorch.org/docs/stable/optim.html#how-to-adjust-learning-rate
        if cfg.SOLVER.SCHEDULE.TYPE == "WarmupReduceLROnPlateau":
            scheduler.step(val_result, epoch=iteration)
            if scheduler.stage_count >= cfg.SOLVER.SCHEDULE.MAX_DECAY_STEP:
                logger.info("Trigger MAX_DECAY_STEP at iteration {}.".format(iteration))
                break
        else:
            scheduler.step()

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info(
        "Total training time: {} ({:.4f} s / it)".format(
            total_time_str, total_training_time / (max_iter)
        )
    )
    return model
Beispiel #12
0
def train(cfg, local_rank, distributed):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    # Initialize mixed-precision training
    use_mixed_precision = cfg.DTYPE == "float16"
    amp_opt_level = 'O1' if use_mixed_precision else 'O0'
    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level=amp_opt_level)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0
    arguments['phase'] = 1
    arguments['plot_median'], arguments['plot_global_avg'] = defaultdict(
        list), defaultdict(list)

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler,
                                         output_dir, save_to_disk)
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    test_period = cfg.SOLVER.TEST_PERIOD
    if test_period > 0:
        data_loader_val = make_data_loader(cfg,
                                           is_train=False,
                                           is_distributed=distributed,
                                           is_for_period=True)
    else:
        data_loader_val = None

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    if arguments['phase'] == 1:
        data_loader = make_data_loader(
            cfg,
            is_train=True,
            is_distributed=distributed,
            start_iter=arguments["iteration"],
            phase=1,
        )
        do_train(
            cfg,
            model,
            data_loader,
            data_loader_val,
            optimizer,
            scheduler,
            checkpointer,
            device,
            checkpoint_period,
            test_period,
            arguments,
            training_phase=1,
        )
        arguments["iteration"] = 0
        arguments["phase"] = 2

    data_loader_phase2 = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
        phase=2,
    )

    do_train(
        cfg,
        model,
        data_loader_phase2,
        data_loader_val,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        test_period,
        arguments,
        training_phase=2,
    )

    return model
Beispiel #13
0
def pred_with_weight(args):
    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.deprecated.init_process_group(backend="nccl",
                                                        init_method="env://")

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    for weight in args.weights:
        # skipping evaluations already performed
        out_json = "{}/detections/{}.json".format(
            cfg.OUTPUT_DIR,
            weight.split('/')[-1].split('_')[-1].split('.')[0])
        if os.path.exists(out_json):
            print('skipping', out_json)
            continue
        checkpointer = DetectronCheckpointer(cfg, model)
        _ = checkpointer.load(weight)

        iou_types = ("bbox", )
        if cfg.MODEL.MASK_ON:
            iou_types = iou_types + ("segm", )
        output_folders = [None] * len(cfg.DATASETS.TEST)

        if cfg.OUTPUT_DIR:
            dataset_names = cfg.DATASETS.TEST
            for idx, dataset_name in enumerate(dataset_names):
                output_folder = cfg.OUTPUT_DIR
                mkdir(output_folder)
                output_folders[idx] = output_folder
        data_loaders_val = make_data_loader(cfg,
                                            is_train=False,
                                            is_distributed=distributed)
        for output_folder, data_loader_val in zip(output_folders,
                                                  data_loaders_val):
            inference(
                model,
                data_loader_val,
                iou_types=iou_types,
                box_only=cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=output_folder,
                name=weight.split('_')[-1].split('.')[0])
            synchronize()
Beispiel #14
0
def train(cfg, local_rank, distributed):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    summary_writer = SummaryWriter(log_dir=output_dir)
    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(
        cfg, model, optimizer, scheduler, output_dir, save_to_disk
    )

    if cfg.MODEL.WEIGHT.upper() == 'CONTINUE':
        model_weight = last_checkpoint(output_dir)
    else:
        model_weight = cfg.MODEL.WEIGHT
    extra_checkpoint_data = checkpointer.load(model_weight)

    arguments.update(extra_checkpoint_data)

    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    data_loader_val = make_data_loader(
        cfg,
        is_train=False,
        is_distributed=distributed)[0]

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    do_train(
        model=model,
        data_loader=data_loader,
        data_loader_val=data_loader_val,
        optimizer=optimizer,
        scheduler=scheduler,
        checkpointer=checkpointer,
        device=device,
        checkpoint_period=checkpoint_period,
        arguments=arguments,
        summary_writer=summary_writer
    )

    return model
Beispiel #15
0
    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)

    results = []
    for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
        result = inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            skip_eval=args.skip_eval,
            dllogger=dllogger,
Beispiel #16
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--data-dir",
        default=".",
        metavar="DIR",
        help="data dir for training",
        type=str,
    )
    parser.add_argument(
        "--out-dir",
        default=".",
        metavar="DIR",
        help="output dir for model",
        type=str,
    )
    parser.add_argument(
        "--gpu_ids",
        default="-1",
        help="gpu id",
        type=str,
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()
    if args.gpu_ids != '-1':
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_ids

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.MODEL.WEIGHT = args.data_dir + cfg.MODEL.WEIGHT[1:]
    cfg.DATA_DIR = args.data_dir + cfg.DATA_DIR[1:]
    cfg.OUTPUT_DIR = args.out_dir + cfg.OUTPUT_DIR[1:]
    print("cfg.OUTPUT_DIR: ", cfg.OUTPUT_DIR)
    print("cfg.MODEL.WEIGHT: ", cfg.MODEL.WEIGHT)
    print("cfg.DATA_DIR: ", cfg.DATA_DIR)
    print("cfg.MODEL.ATTRIBUTE_ON: ", cfg.MODEL.ATTRIBUTE_ON)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    # evaluate object detection
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        result_obj = inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            eval_attributes=False,
        )
        synchronize()

    # evaluate attribute detection
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        result_attr = inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            eval_attributes=True,
        )
        synchronize()

    # evaluate RPN
    cfg.defrost()
    cfg.MODEL.RPN_ONLY = True
    cfg.freeze()
    logger.info(cfg)
    # pdb.set_trace()
    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)
    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        result_rpn = inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            eval_attributes=False,
        )
        synchronize()

    if is_main_process():
        results = {**result_rpn, **result_obj, **result_attr}
        print(results)
def train(cfg, local_rank, distributed):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)
    params = get_model_parameters_number(model)
    print('{:<30}  {:<8}'.format('Number of parameters: ', params))
    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[local_rank], output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(
        cfg, model, optimizer, scheduler, output_dir, save_to_disk
    )
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    if cfg.MODEL.DOMAIN_ADAPTATION_ON:
        source_data_loader = make_data_loader(
            cfg,
            is_train=True,
            is_source=True,
            is_distributed=distributed,
            start_iter=arguments["iteration"],
        )
        target_data_loader = make_data_loader(
            cfg,
            is_train=True,
            is_source=False,
            is_distributed=distributed,
            start_iter=arguments["iteration"],
        )

        do_da_train(
            model,
            source_data_loader,
            target_data_loader,
            optimizer,
            scheduler,
            checkpointer,
            device,
            checkpoint_period,
            arguments,
            cfg,
        )
    else:
        data_loader = make_data_loader(
            cfg,
            is_train=True,
            is_distributed=distributed,
            start_iter=arguments["iteration"],
        )
        
        do_train(
            model,
            data_loader,
            optimizer,
            scheduler,
            checkpointer,
            device,
            checkpoint_period,
            arguments,
        )

    return model
Beispiel #18
0
def main():


    parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="/home/guli/Desktop/VOS_ICCV2019/maskrcnn-benchmark/configs/davis/e2e_mask_rcnn_R_50_FPN_1x_davis.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.deprecated.init_process_group(
            backend="nccl", init_method="env://"
        )

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("DAVIS_MaskRCNN_baseline_test", save_dir, args.local_rank)
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    checkpointer = Checkpointer(model)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox",)
    # if cfg.MODEL.MASK_ON:
    #     iou_types = iou_types + ("segm",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    if cfg.OUTPUT_DIR:
        dataset_names = cfg.DATASETS.TEST
        exp_name = cfg.EXP.NAME
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name + "_" + exp_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
    for output_folder, data_loader_val in zip(output_folders, data_loaders_val):
        inference_davis(
            model,
            data_loader_val,
            iou_types=iou_types,
            box_only=False,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            debug=cfg.TEST.DEBUG,
            generate_annotation=cfg.TEST.GENERATE_ANNOTATION,
            overlay_box=cfg.TEST.OVERLAY_BOX,
            matching=cfg.TEST.MATCHING,
            skip_computation_network=cfg.TEST.SKIP_NETWORK,
            select_top_predictions_flag=cfg.TEST.SELECT_TOP_PREDICTIONS,
            cfg=cfg
        )
        synchronize()
Beispiel #19
0
def train(cfg, local_rank, distributed, logger):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg,
                               model,
                               logger,
                               rl_factor=float(cfg.SOLVER.IMS_PER_BATCH))
    scheduler = make_lr_scheduler(cfg, optimizer)

    # Initialize mixed-precision training
    use_mixed_precision = cfg.DTYPE == "float16"
    amp_opt_level = 'O1' if use_mixed_precision else 'O0'
    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level=amp_opt_level)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler,
                                         output_dir, save_to_disk)
    extra_checkpoint_data = checkpointer.load(
        cfg.MODEL.WEIGHT,
        update_schedule=cfg.SOLVER.UPDATE_SCHEDULE_DURING_LOAD)
    arguments.update(extra_checkpoint_data)

    train_data_loader = make_data_loader(
        cfg,
        mode='train',
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )
    val_data_loaders = make_data_loader(
        cfg,
        mode='val',
        is_distributed=distributed,
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    if cfg.SOLVER.PRE_VAL:
        logger.info("Validate before training")
        run_val(cfg, model, val_data_loaders, distributed)

    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(train_data_loader)
    start_iter = arguments["iteration"]
    start_training_time = time.time()
    end = time.time()
    for iteration, (images, targets, _) in enumerate(train_data_loader,
                                                     start_iter):
        model.train()

        if any(len(target) < 1 for target in targets):
            logger.error(
                f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}"
            )
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        scheduler.step()

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()
        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        with amp.scale_loss(losses, optimizer) as scaled_losses:
            scaled_losses.backward()
        optimizer.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 200 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))

        if cfg.SOLVER.TO_VAL and iteration % cfg.SOLVER.VAL_PERIOD == 0:
            logger.info("Start validating")
            run_val(cfg, model, val_data_loaders, distributed)

        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))

    return model
Beispiel #20
0
def train(cfg, local_rank, distributed):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

#     if use_amp:
#         # Initialize mixed-precision training
#         use_mixed_precision = cfg.DTYPE == "float16"
#         amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)

#         # wrap the optimizer for mixed precision
#         if cfg.SOLVER.ACCUMULATE_GRAD:
#             # also specify number of steps to accumulate over
#             optimizer = amp_handle.wrap_optimizer(optimizer, num_loss=cfg.SOLVER.ACCUMULATE_STEPS)
#         else:
#             optimizer = amp_handle.wrap_optimizer(optimizer)


    model, optimizer = amp.initialize(model, optimizer,opt_level='O1')
    if distributed:
        if use_apex_ddp:
            model = DDP(model, delay_allreduce=True)
        else:
            model = torch.nn.parallel.DistributedDataParallel(
                model, device_ids=[local_rank], output_device=local_rank,
                # this should be removed if we update BatchNorm stats
                broadcast_buffers=False,
            )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(
        cfg, model, optimizer, scheduler, output_dir, save_to_disk
    )
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader, iters_per_epoch = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    # set the callback function to evaluate and potentially
    # early exit each epoch
    if 1==1:
        
        per_iter_callback_fn = functools.partial(
                mlperf_test_early_exit,
                iters_per_epoch=iters_per_epoch,
                tester=functools.partial(test, cfg=cfg),
                model=model,
                distributed=distributed,
                min_bbox_map=cfg.MIN_BBOX_MAP,
                min_segm_map=cfg.MIN_MASK_MAP)
    else:
        per_iter_callback_fn = None

    do_train(
        model,
        data_loader,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        arguments,
        use_amp,
        cfg,
        per_iter_end_callback_fn=per_iter_callback_fn,
    )

    return model
Beispiel #21
0
def test_maskscoring_rcnn(config_file):

    import sys
    sys.path.append('./detection_model/maskscoring_rcnn')

    import argparse
    import os
    os.environ['CUDA_VISIBLE_DEVICES'] = '2'
    import torch
    #from maskrcnn_benchmark.config import cfg
    from maskrcnn_benchmark.data import make_data_loader
    from maskrcnn_benchmark.engine.inference import inference
    from maskrcnn_benchmark.modeling.detector import build_detection_model
    from maskrcnn_benchmark.utils.checkpoint import DetectronCheckpointer
    from maskrcnn_benchmark.utils.collect_env import collect_env_info
    from maskrcnn_benchmark.utils.comm import synchronize, get_rank
    from maskrcnn_benchmark.utils.logger import setup_logger
    from maskrcnn_benchmark.utils.miscellaneous import mkdir

    from yacs.config import CfgNode as CN

    def read_config_file(config_file):
        """
        read config information form yaml file
        """
        f = open(config_file)
        opt = CN.load_cfg(f)
        return opt

    opt = read_config_file(config_file)

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(opt.local_rank)
        torch.distributed.deprecated.init_process_group(
            backend="nccl", init_method="env://"
        )

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(opt)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(opt)
    model.to(opt.MODEL.DEVICE)

    output_dir = opt.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(opt, model, save_dir=output_dir)
    _ = checkpointer.load(opt.MODEL.WEIGHT)

    iou_types = ("bbox",)
    if opt.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    output_folders = [None] * len(opt.DATASETS.TEST)
    if opt.OUTPUT_DIR:
        dataset_names = opt.DATASETS.TEST
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(opt.OUTPUT_DIR, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(opt, is_train=False, is_distributed=distributed)
    for output_folder, data_loader_val in zip(output_folders, data_loaders_val):
        inference(
            model,
            data_loader_val,
            iou_types=iou_types,
            box_only=opt.MODEL.RPN_ONLY,
            device=opt.MODEL.DEVICE,
            expected_results=opt.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=opt.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            maskiou_on=opt.MODEL.MASKIOU_ON
        )
        synchronize()
def train(cfg, cfg_origial, local_rank, distributed):
    ## The one with modified number of classes
    model = build_detection_model(cfg)

    # cfg_origial = cfg.clone()
    # cfg_origial.MODEL.ROI_BOX_HEAD.NUM_CLASSES = 81
    # original_model = build_detection_model(cfg_origial)     ## Original model with 81 classes

    # ## Let's load weights for old class!
    # save_dir = cfg.OUTPUT_DIR
    # checkpointer = DetectronCheckpointer(cfg_origial, original_model, save_dir=save_dir)
    # checkpointer.load(cfg_origial.MODEL.WEIGHT)

    # # pretrained_model_pth = "/network/home/bhattdha/.torch/models/_detectron_35861795_12_2017_baselines_e2e_mask_rcnn_R-101-FPN_1x.yaml.02_31_37.KqyEK4tT_output_train_coco_2014_train%3Acoco_2014_valminusminival_generalized_rcnn_model_final.pkl"
    # # These keys are to be removed which forms final layers of the network
    # removal_keys = ['roi_heads.box.predictor.cls_score.weight', 'roi_heads.box.predictor.cls_score.bias', 'roi_heads.box.predictor.bbox_pred.weight', 'roi_heads.box.predictor.bbox_pred.bias', 'roi_heads.mask.predictor.mask_fcn_logits.weight', 'roi_heads.mask.predictor.mask_fcn_logits.bias']

    # model = _transfer_pretrained_weights(new_model, original_model, removal_keys)

    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)
    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    # # Initialize mixed-precision training
    # use_mixed_precision = cfg.DTYPE == "float16"
    # amp_opt_level = 'O1' if use_mixed_precision else 'O0'
    # model, optimizer = amp.initialize(model, optimizer, opt_level=amp_opt_level)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0
    output_dir = cfg.OUTPUT_DIR
    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler,
                                         output_dir, save_to_disk)

    # cfg.MODEL.WEIGHT = '/network/home/bhattdha/exp.pth' ## Model stored through surgery
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    do_train(
        model,
        data_loader,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        arguments,
    )

    return model
Beispiel #23
0
def main():
    parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument("--seq_test", action='store_true')
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(
            backend="nccl", init_method="env://"
        )
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = cfg.OUTPUT_DIR
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank(), filename='test_log.txt')
    logger.info("Using {} GPUs".format(num_gpus))
    # logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    # logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    ori_output_dir = cfg.OUTPUT_DIR

    if args.seq_test:
        load_dir = cfg.TEST.LOAD_DIR 
        model_files = glob.glob(load_dir+'/*.pth')
        model_files.sort()
        min_iter = cfg.TEST.MIN_ITER 
        max_iter = cfg.TEST.MAX_ITER 
        # print(model_files)
        model_files = [model_file for model_file in model_files if 'final' not in model_file and int(model_file[-11:-4])>=min_iter and int(model_file[-11:-4])<=max_iter]
    else:
        model_files = [cfg.MODEL.WEIGHT]

    for model_file in model_files:
        cfg.defrost()
        cfg.MODEL.WEIGHT = model_file
        logger.info('testing from {} '.format(model_file))
        cfg.OUTPUT_DIR = os.path.join(ori_output_dir, model_file[-11:-4])
        cfg.freeze()

        output_dir = cfg.OUTPUT_DIR
        checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
        _ = checkpointer.load(cfg.MODEL.WEIGHT)

        iou_types = ("bbox",)
        if cfg.MODEL.MASK_ON:
            iou_types = iou_types + ("segm",)
        if cfg.MODEL.KEYPOINT_ON:
            iou_types = iou_types + ("keypoints",)
        output_folders = [None] * len(cfg.DATASETS.TEST)
        dataset_names = cfg.DATASETS.TEST
        if cfg.OUTPUT_DIR:
            for idx, dataset_name in enumerate(dataset_names):
                output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
                mkdir(output_folder)
                output_folders[idx] = output_folder
        data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
        for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
            inference(
                model,
                data_loader_val,
                dataset_name=dataset_name,
                iou_types=iou_types,
                box_only=False, #False if cfg.MODEL.FCOS_ON or cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=output_folder,
                stop_iter=cfg.FEW_SHOT.STOP_ITER
            )
            synchronize()
Beispiel #24
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--ckpt",
        help=
        "The path to the checkpoint for test, default is the latest checkpoint.",
        default=None,
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    parser.add_argument(
        "--build-model",
        default="",
        metavar="FILE",
        help="path to NAS model build file",
        type=str,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    assert len(args.build_model) != 0, 'args.build_model should be provided'
    model_config = json.load(open(args.build_model, 'r'))
    if isinstance(model_config, list):
        assert len(model_config) == 1
        model_config = model_config[0]
    print('Testing single model:', model_config)

    model = build_detection_model(cfg, model_config)
    model.to(cfg.MODEL.DEVICE)

    # Initialize mixed-precision if necessary
    use_mixed_precision = cfg.DTYPE == 'float16'
    amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt
    _ = checkpointer.load(ckpt, use_latest=args.ckpt is None)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    # output_folders = [None] * len(cfg.DATASETS.TEST)
    # dataset_names = cfg.DATASETS.TEST
    dataset_names = cfg.DATASETS.NAS_VAL if not cfg.NAS.TRAIN_SINGLE_MODEL else cfg.DATASETS.TEST
    output_folders = [None] * len(dataset_names)

    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed,
                                        test_only=cfg.TEST_ONLY)

    if cfg.NAS.TRAIN_SINGLE_MODEL:
        if get_rank() == 0:
            print('==' * 20, 'Evaluating single model...', '==' * 20)
        for output_folder, dataset_name, data_loader_val in zip(
                output_folders, dataset_names, data_loaders_val):
            inference(
                model,
                data_loader_val,
                dataset_name=dataset_name,
                iou_types=iou_types,
                box_only=False
                if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                bbox_aug=cfg.TEST.BBOX_AUG.ENABLED,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=output_folder,
                c2d_json_path=cfg.MODEL.SEG_BRANCH.JSON_PATH,
                cfg=cfg,
                test_only=cfg.TEST_ONLY)
            synchronize()
    elif not cfg.NAS.SKIP_NAS_TEST:
        if get_rank() == 0:
            print('==' * 10, 'Start NAS testing', '==' * 10)
        timer = Timer()
        timer.tic()
        searcher = PathPrioritySearch(cfg, './nas_test')
        searcher.generate_fair_test(
        )  # load cache results and generate new model for test
        searcher.search(model, output_folders, dataset_names, distributed)
        searcher.save_topk()
        total_time = timer.toc()
        total_time_str = get_time_str(total_time)
        if get_rank() == 0:
            print('Finish NAS testing, total time:{}'.format(total_time_str))
        return
    else:
        print('Skipping NAS testing...')
Beispiel #25
0
def main():
    args = parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)

    output_dir = os.path.dirname(cfg.MODEL.WEIGHT)
    cfg.OUTPUT_DIR = output_dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    checkpointer = DetectronCheckpointer(cfg, model, save_dir=cfg.MODEL.WEIGHT)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST

    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)

    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
Beispiel #26
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    ## add for training
    parser.add_argument(
        "--data-dir",
        default="",
        metavar="DIR",
        help="path to data folder",
        type=str,
    )
    parser.add_argument(
        "--pretrained-model",
        default="",
        help="path to pretrained model",
        metavar="FILE",
        type=str,
    )
    parser.add_argument(
        "--nonlocal-cls-num-group",
        default="1",
        help="nonlocal num group cls",
        metavar="1",
        type=int,
    )
    parser.add_argument(
        "--nonlocal-cls-num-stack",
        default="0",
        help="nonlocal num stack cls",
        metavar="1",
        type=int,
    )
    parser.add_argument(
        "--nonlocal-reg-num-group",
        default="1",
        help="nonlocal num group reg",
        metavar="1",
        type=int,
    )
    parser.add_argument(
        "--nonlocal-reg-num-stack",
        default="0",
        help="nonlocal num stack reg",
        metavar="1",
        type=int,
    )
    parser.add_argument(
        "--nonlocal-shared-num-group",
        default="1",
        help="nonlocal num group reg",
        metavar="1",
        type=int,
    )
    parser.add_argument(
        "--nonlocal-shared-num-stack",
        default="0",
        help="nonlocal num stack reg",
        metavar="1",
        type=int,
    )

    parser.add_argument(
        "--nonlocal-out-channels",
        default="2048",
        help="nonlocal out channels for fpn, fpn=2048(like c4)",
        metavar="2048",
        type=int,
    )
    parser.add_argument(
        "--nonlocal-inter-channels",
        default="256",
        help="nonlocal inter channels, c4 < 2048, fpn < 256",
        metavar="256",
        type=int,
    )
    parser.add_argument(
        "--nonlocal-use-shared",
        default="True",
        help="nonlocal use shared non-locael",
        metavar="True",
        type=str,
    )
    parser.add_argument(
        "--nonlocal-use-bn",
        default="True",
        help="nonlocal use bn after attention",
        metavar="True",
        type=str,
    )
    parser.add_argument(
        "--nonlocal-use-softmax",
        default="False",
        help="nonlocal use softmax other than div",
        metavar="False",
        type=str,
    )
    parser.add_argument(
        "--nonlocal-use-attention",
        default="True",
        help="nonlocal use attention before ffconv",
        metavar="True",
        type=str,
    )

    parser.add_argument(
        "--nonlocal-use-ffconv",
        default="True",
        help="nonlocal use ffconv after nonlocal with residual",
        metavar="True",
        type=str,
    )
    parser.add_argument(
        "--nonlocal-use-relu",
        default="True",
        help="nonlocal use relu after bn",
        metavar="True",
        type=str,
    )
    parser.add_argument(
        "--conv-bbox-expand",
        default="1.0",
        help="box expand conv",
        metavar="1.0",
        type=float,
    )
    parser.add_argument(
        "--fc-bbox-expand",
        default="1.0",
        help="box expand fc",
        metavar="1.0",
        type=float,
    )
    parser.add_argument(
        "--backbone-out-channels",
        default="256",
        help="fpn out channels for fpn, fpn=2048(like c4)",
        metavar="256",
        type=int,
    )

    parser.add_argument(
        "--evaluation-flags",
        nargs='*',
        # default=[0, 3],
        default=[],
        help="model code for evaluation flags",
        metavar="1 1 1 1",
        type=int,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)

    cfg.DATA_DIR = args.data_dir
    cfg.MODEL.WEIGHT = args.pretrained_model
    cfg.MODEL.ROI_BOX_HEAD.NEIGHBOR_CONV_EXPAND = args.conv_bbox_expand
    cfg.MODEL.ROI_BOX_HEAD.NEIGHBOR_FC_EXPAND = args.fc_bbox_expand

    cfg.MODEL.ROI_BOX_HEAD.NONLOCAL_CLS_NUM_GROUP = args.nonlocal_cls_num_group
    cfg.MODEL.ROI_BOX_HEAD.NONLOCAL_CLS_NUM_STACK = args.nonlocal_cls_num_stack
    cfg.MODEL.ROI_BOX_HEAD.NONLOCAL_REG_NUM_GROUP = args.nonlocal_reg_num_group
    cfg.MODEL.ROI_BOX_HEAD.NONLOCAL_REG_NUM_STACK = args.nonlocal_reg_num_stack
    cfg.MODEL.ROI_BOX_HEAD.NONLOCAL_SHARED_NUM_GROUP = args.nonlocal_shared_num_group
    cfg.MODEL.ROI_BOX_HEAD.NONLOCAL_SHARED_NUM_STACK = args.nonlocal_shared_num_stack
    cfg.MODEL.ROI_BOX_HEAD.NONLOCAL_INTER_CHANNELS = args.nonlocal_inter_channels
    cfg.MODEL.ROI_BOX_HEAD.NONLOCAL_OUT_CHANNELS = args.nonlocal_out_channels

    cfg.MODEL.ROI_BOX_HEAD.NONLOCAL_USE_SHARED = ast.literal_eval(
        args.nonlocal_use_shared)
    cfg.MODEL.ROI_BOX_HEAD.NONLOCAL_USE_BN = ast.literal_eval(
        args.nonlocal_use_bn)
    cfg.MODEL.ROI_BOX_HEAD.NONLOCAL_USE_SOFTMAX = ast.literal_eval(
        args.nonlocal_use_softmax)
    cfg.MODEL.ROI_BOX_HEAD.NONLOCAL_USE_FFCONV = ast.literal_eval(
        args.nonlocal_use_ffconv)
    cfg.MODEL.ROI_BOX_HEAD.NONLOCAL_USE_RELU = ast.literal_eval(
        args.nonlocal_use_relu)

    cfg.MODEL.ROI_BOX_HEAD.NONLOCAL_USE_ATTENTION = ast.literal_eval(
        args.nonlocal_use_attention)

    cfg.MODEL.BACKBONE.OUT_CHANNELS = args.backbone_out_channels

    # double heads
    cfg.TEST.EVALUATION_FLAGS = args.evaluation_flags

    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
Beispiel #27
0
def train(cfg, local_rank, distributed):
    model = build_detection_model(cfg)
    device = torch.device(cfg.MODEL.DEVICE)
    model.to(device)

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg, model, None, None, output_dir,
                                         save_to_disk)
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    # arguments.update(extra_checkpoint_data)

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    if cfg.MODEL.META_ARCHITECTURE == 'AdaptionRCNN':
        logger.info('AdaptionRCNN trainer is adapted!')
        cross_do_train(
            cfg,
            model,
            optimizer,
            scheduler,
            checkpointer,
            device,
            checkpoint_period,
            arguments,
            distributed,
        )
    elif cfg.MODEL.META_ARCHITECTURE == 'GeneralizedRCNN':
        logger.info('GeneralizedRCNN trainer is adapted!')
        data_loader = make_data_loader(
            cfg,
            is_train=True,
            is_distributed=distributed,
            start_iter=arguments["iteration"],
        )
        do_train(
            cfg,
            model,
            data_loader,
            optimizer,
            scheduler,
            checkpointer,
            device,
            checkpoint_period,
            arguments,
            distributed,
        )

    return model
Beispiel #28
0
    try:
        cfg.merge_from_file(config_file)
    except KeyError as e:
        print(e)
    cfg.INPUT.PIXEL_MEAN = [0, 0, 0]
    cfg.INPUT.HORIZONTAL_FLIP_PROB_TRAIN = 0.5
    cfg.INPUT.VERTICAL_FLIP_PROB_TRAIN = 0.5
    cfg.INPUT.ROTATE_PROB_TRAIN = 1.0
    cfg.INPUT.ROTATE_DEGREES_TRAIN = (-45, 45)
    cfg.DATALOADER.NUM_WORKERS = 1
    cfg.DATALOADER.SIZE_DIVISIBILITY = 0
    cfg.freeze()

    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=False,
        start_iter=0,
    )

    device = 'cpu'

    is_rotated = 1  #cfg.MODEL.ROTATED
    if is_rotated:
        from maskrcnn_benchmark.modeling.rrpn.utils import get_boxlist_rotated_rect_tensor
        from maskrcnn_benchmark.modeling.rrpn.anchor_generator import draw_anchors

    start_iter = 0
    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
        img_tensors = images.tensors

        for id in range(len(targets)):
Beispiel #29
0
def run_test(cfg, model, distributed, iter, valid=False):
    if distributed:
        model = model.module
    torch.cuda.empty_cache()  # TODO check if it helps
    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )

    if valid:
        output_folders = [None] * len(cfg.DATASETS.VALID)
        dataset_names = cfg.DATASETS.VALID
        if cfg.OUTPUT_DIR:
            for idx, dataset_name in enumerate(dataset_names):
                output_folder = os.path.join(
                    cfg.OUTPUT_DIR, "validation", dataset_name,
                    '{}_{}'.format(iter, cfg.MODEL.ROI_HEADS.SCORE_THRESH))
                mkdir(output_folder)
                output_folders[idx] = output_folder
        data_loaders_val = make_data_loader(cfg,
                                            dataset='valid',
                                            is_distributed=distributed)
        print(distributed)
        results = []
        for output_folder, dataset_name, data_loader_val in zip(
                output_folders, dataset_names, data_loaders_val):
            # TODO if multiple valid set, result will be a list
            result = inference(
                model,
                data_loader_val,
                dataset_name=dataset_name,
                iou_types=iou_types,
                box_only=False
                if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=output_folder,
            )
            results.append(result)
        return results
    else:
        output_folders = [None] * len(cfg.DATASETS.TEST)
        dataset_names = cfg.DATASETS.TEST
        if cfg.OUTPUT_DIR:
            for idx, dataset_name in enumerate(dataset_names):
                output_folder = os.path.join(
                    cfg.OUTPUT_DIR, "inference", dataset_name,
                    '{}_{}'.format(iter, cfg.MODEL.ROI_HEADS.SCORE_THRESH))
                mkdir(output_folder)
                output_folders[idx] = output_folder
        data_loaders_test = make_data_loader(cfg,
                                             dataset='test',
                                             is_distributed=distributed)
        for output_folder, dataset_name, data_loader_val in zip(
                output_folders, dataset_names, data_loaders_test):
            inference(
                model,
                data_loader_val,
                dataset_name=dataset_name,
                iou_types=iou_types,
                box_only=False
                if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=output_folder,
            )
            synchronize()
def train(cfg, local_rank, distributed):
    model = build_detection_model(cfg)  # 梦开始的地方
    device = torch.device(cfg.MODEL.DEVICE)  # !!!!!
    model.to(device)

    for name, value in model.backbone.body.network.named_children(
    ):  # 冻结主干网络参数
        if int(name) > 60:
            for param in value.parameters():
                param.requires_grad = False

    optimizer = make_optimizer(cfg, model)
    scheduler = make_lr_scheduler(cfg, optimizer)

    # Initialize mixed-precision training
    use_mixed_precision = cfg.DTYPE == "float16"  # 这里可以改成float16来加速
    amp_opt_level = 'O1' if use_mixed_precision else 'O0'
    model, optimizer = amp.initialize(model,
                                      optimizer,
                                      opt_level=amp_opt_level)

    if distributed:
        model = torch.nn.parallel.DistributedDataParallel(
            model,
            device_ids=[local_rank],
            output_device=local_rank,
            # this should be removed if we update BatchNorm stats
            broadcast_buffers=False,
        )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(cfg, model, optimizer, scheduler,
                                         output_dir, save_to_disk)
    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)
    arguments.update(extra_checkpoint_data)

    data_loader = make_data_loader(
        cfg,
        is_train=True,
        is_distributed=distributed,
        start_iter=arguments["iteration"],
    )

    test_period = cfg.SOLVER.TEST_PERIOD
    if test_period > 0:
        data_loader_val = make_data_loader(cfg,
                                           is_train=False,
                                           is_distributed=distributed,
                                           is_for_period=True)
    else:
        data_loader_val = None

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    do_train(
        cfg,
        model,
        data_loader,
        data_loader_val,
        optimizer,
        scheduler,
        checkpointer,
        device,
        checkpoint_period,
        test_period,
        arguments,
    )

    return model
Beispiel #31
0
def do_train(model, data_loader, optimizer, scheduler, checkpointer, device,
             checkpoint_period, arguments, meters):
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=False)
    output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                 cfg.DATASETS.TEST[0])
    mkdir(output_folder)
    start_training_time = time.time()
    end = time.time()
    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
        model.train()
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        scheduler.step()

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)
        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
            # EVALUATION

            # inference(
            #     model,
            #     cfg,
            #     data_loaders_val[0],
            #     dataset_name=cfg.DATASETS.TEST,
            #     device=cfg.MODEL.DEVICE,
            #     meters=meters,
            #     expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            #     output_folder=output_folder,
            # )
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
def main():
    parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.deprecated.init_process_group(
            backend="nccl", init_method="env://"
        )

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1
    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    init()
    tag = 17
    set_epoch_tag(tag)

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    is_fp16 = (cfg.DTYPE == "float16")
    if is_fp16:
        # convert model to FP16
        model.half()

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)

    #   evaluator = get_evaluator()

    start_test_time = time.time()
    results = []
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        result = inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        results.append(result)
    end_test_time = time.time()
    total_testing_time = end_test_time - start_test_time

    if is_main_process():
        map_results, raw_results = results[0]
        bbox_map = map_results.results["bbox"]['AP']
        segm_map = map_results.results["segm"]['AP']
        print("BBOX_mAP: ", bbox_map, " MASK_mAP: ", segm_map)

    print("Inference time: ", total_testing_time)