Ejemplo n.º 1
0
def test(cfg, model, distributed):
    if distributed:
        model = model.module
    torch.cuda.empty_cache()  # TODO check if it helps
    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
Ejemplo n.º 2
0
def cache_url(url, model_dir=None, progress=True):
    r"""Loads the Torch serialized object at the given URL.
    If the object is already present in `model_dir`, it's deserialized and
    returned. The filename part of the URL should follow the naming convention
    ``filename-<sha256>.ext`` where ``<sha256>`` is the first eight or more
    digits of the SHA256 hash of the contents of the file. The hash is used to
    ensure unique names and to verify the contents of the file.
    The default value of `model_dir` is ``$TORCH_HOME/models`` where
    ``$TORCH_HOME`` defaults to ``~/.torch``. The default directory can be
    overridden with the ``$TORCH_MODEL_ZOO`` environment variable.
    Args:
        url (string): URL of the object to download
        model_dir (string, optional): directory in which to save the object
        progress (bool, optional): whether or not to display a progress bar to stderr
    Example:
        >>> cached_file = maskrcnn_benchmark.utils.model_zoo.cache_url('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth')
    """
    if model_dir is None:
        torch_home = os.path.expanduser(os.getenv('TORCH_HOME', '~/.torch'))
        model_dir = os.getenv('TORCH_MODEL_ZOO', os.path.join(torch_home, 'models'))
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)
    parts = urlparse(url)
    filename = os.path.basename(parts.path)
    if filename == "model_final.pkl":
        # workaround as pre-trained Caffe2 models from Detectron have all the same filename
        # so make the full path the filename by replacing / with _
        filename = parts.path.replace("/", "_")
    cached_file = os.path.join(model_dir, filename)
    if not os.path.exists(cached_file) and is_main_process():
        sys.stderr.write('Downloading: "{}" to {}\n'.format(url, cached_file))
        hash_prefix = HASH_REGEX.search(filename)
        if hash_prefix is not None:
            hash_prefix = hash_prefix.group(1)
            # workaround: Caffe2 models don't have a hash, but follow the R-50 convention,
            # which matches the hash PyTorch uses. So we skip the hash matching
            # if the hash_prefix is less than 6 characters
            if len(hash_prefix) < 6:
                hash_prefix = None
        _download_url_to_file(url, cached_file, hash_prefix, progress=progress)
    synchronize()
    return cached_file
Ejemplo n.º 3
0
def do_train(
    cfg,
    model,
    data_loader,
    data_loader_val,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    test_period,
    arguments,
):
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    dataset_names = cfg.DATASETS.TEST

    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):

        if any(len(target) < 1 for target in targets):
            logger.error(
                f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}"
            )
            continue
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()
        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        with amp.scale_loss(losses, optimizer) as scaled_losses:
            scaled_losses.backward()
        optimizer.step()
        scheduler.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if data_loader_val is not None and test_period > 0 and iteration % test_period == 0:
            meters_val = MetricLogger(delimiter="  ")
            synchronize()
            _ = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg,
                                 is_train=False,
                                 is_distributed=(get_world_size() > 1),
                                 is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False
                if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=None,
            )
            synchronize()
            model.train()
            with torch.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val,
                                    _) in enumerate(tqdm(data_loader_val)):
                    images_val = images_val.to(device)
                    targets_val = [target.to(device) for target in targets_val]
                    loss_dict = model(images_val, targets_val)
                    losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = reduce_loss_dict(loss_dict)
                    losses_reduced = sum(
                        loss for loss in loss_dict_reduced.values())
                    meters_val.update(loss=losses_reduced, **loss_dict_reduced)
            synchronize()
            logger.info(
                meters_val.delimiter.join([
                    "[Validation]: ",
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters_val),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument(
        "--json-file",
        default="",
        metavar="FILE",
        help="path to prediction bbox json file",
    )
    # parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    # num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    # distributed = num_gpus > 1

    # if distributed:
    #     torch.cuda.set_device(args.local_rank)
    #     torch.distributed.init_process_group(
    #         backend="nccl", init_method="env://"
    #     )
    #     synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    # logger.info("Using {} GPUs".format(num_gpus))
    # logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    # model = build_detection_model(cfg)
    # model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    # checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    # _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox", )
    # if cfg.MODEL.MASK_ON:
    #     iou_types = iou_types + ("segm",)
    # if cfg.MODEL.KEYPOINT_ON:
    #     iou_types = iou_types + ("keypoints",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=False)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        # inference(
        #     model,
        #     data_loader_val,
        #     dataset_name=dataset_name,
        #     iou_types=iou_types,
        #     box_only=False if cfg.MODEL.FCOS_ON or cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
        #     device=cfg.MODEL.DEVICE,
        #     expected_results=cfg.TEST.EXPECTED_RESULTS,
        #     expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
        #     output_folder=output_folder,
        # )

        # extra_args = dict(
        #     box_only=False,
        #     iou_types=iou_types,
        #     expected_results=cfg.TEST.EXPECTED_RESULTS,
        #     expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
        # )
        dataset = data_loader_val.dataset

        # evaluate(dataset=dataset,
        #         predictions=predictions,
        #         output_folder=output_folder,
        #         only_human=True,
        #         **extra_args)

        do_coco_json_evaluation(
            dataset=dataset,
            json_file=args.json_file,
            box_only=False,
            output_folder=output_folder,
            iou_types=iou_types,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL)

        synchronize()
Ejemplo n.º 5
0
def main():
    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())

    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="configs/caffe2/e2e_mask_rcnn_R_50_FPN_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        split=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        predictions = inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            device=cfg.MODEL.DEVICE,
            output_folder=output_folder,
        )

        eval(
            predictions,
            data_loader_val,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
        )

        synchronize()
Ejemplo n.º 6
0
def do_train(model, data_loader, data_loader_val, optimizer, scheduler,
             checkpointer, device, checkpoint_period, vis_period, arguments,
             cfg, tb_writer, distributed):
    from tools.train_net import run_test
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()
    vis_num = 0
    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        scheduler.step()

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        loss_dict = model(images, targets)

        losses = sum(
            v * cfg.SOLVER.LOSS_WEIGHT.MASK_WEIGHT if k == 'loss_mask' else v *
            cfg.SOLVER.LOSS_WEIGHT.BOX_WEIGHT for k, v in loss_dict.items())
        # losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        loss_dict_reduced = {
            k: (v *
                cfg.SOLVER.LOSS_WEIGHT.MASK_WEIGHT if k == 'loss_mask' else v *
                cfg.SOLVER.LOSS_WEIGHT.BOX_WEIGHT)
            for k, v in loss_dict_reduced.items()
        }

        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(loss=losses_reduced, **loss_dict_reduced)

        if tb_writer:
            tb_writer.add_scalars('train/Losses',
                                  loss_dict_reduced,
                                  global_step=iteration)
            tb_writer.add_scalar('train/Loss',
                                 losses_reduced,
                                 global_step=iteration)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if cfg.SOLVER.VIS_ON and iteration % vis_period == 0:
            # visualize predict box
            # set model to eval mode
            model.eval()
            vis_image, vis_image_transformed, target = data_loader_val.dataset.get_image(
                vis_num)
            image_list = to_image_list(vis_image_transformed,
                                       cfg.DATALOADER.SIZE_DIVISIBILITY)
            image_list = image_list.to(device)
            cpu_device = torch.device("cpu")
            with torch.no_grad():
                predictions = model(image_list)
                predictions = [o.to(cpu_device) for o in predictions]

            # only one picture
            predictions = predictions[0]
            top_predictions = select_topn_predictions(predictions, 3)

            # visualize
            result = vis_image.copy()
            result = overlay_boxes_cls_names(result, top_predictions, target)

            result = torch.from_numpy(result)
            result = result.permute(2, 0, 1)[None, :, :, :]
            result = make_grid([result])
            if tb_writer:
                tb_writer.add_image('Image_train', result, iteration)
            synchronize()
            model.train()
            vis_num += 1
            vis_num %= len(data_loader_val.dataset)
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)

            # eval
            model.eval()
            results = run_test(cfg,
                               model,
                               distributed,
                               iter=iteration,
                               valid=True)
            if tb_writer:
                for result in results:
                    for k, v in result.items():
                        tb_writer.add_scalar('valid/{}'.format(k),
                                             v,
                                             global_step=iteration)
            synchronize()
            model.train()

        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
Ejemplo n.º 7
0
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    cf.args = args

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    #    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)
Ejemplo n.º 8
0
def main():
    parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.deprecated.init_process_group(
            backend="nccl", init_method="env://"
        )

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
Ejemplo n.º 9
0
def main():
    parser = argparse.ArgumentParser(description="PyTorch Object Detection Training")
    parser.add_argument(
        "--config-file",
        default="data/occlusion_net_train.yaml",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)

    parser.add_argument(
        "--cometml-tag",
        dest="cometml_tag",
        default="occlusion-net",
    )

    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()
    if USE_COMETML == True:
        experiment.add_tag(args.cometml_tag)

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(
            backend="nccl", init_method="env://"
        )
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir:
        mkdir(output_dir)

    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))
 
    model = train(cfg, args.local_rank, args.distributed)

    if not args.skip_test:
        run_test(cfg, model, args.distributed)
Ejemplo n.º 10
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Training")
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    parser.add_argument(
        "--build-model",
        default="",
        metavar="FILE",
        help="path to NAS model build file",
        type=str,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir:
        mkdir(output_dir)

    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    output_config_path = os.path.join(cfg.OUTPUT_DIR, 'config.yml')
    logger.info("Saving config into: {}".format(output_config_path))
    # save overloaded model config in the output directory
    save_config(cfg, output_config_path)

    if cfg.NAS.TRAIN_SINGLE_MODEL:
        assert len(
            args.build_model) != 0, 'args.build_model should be provided'
        model_config = json.load(open(args.build_model, 'r'))
        if isinstance(model_config, list):
            assert len(model_config) == 1
            model_config = model_config[0]
        print('Training single model:', model_config)
        model = train(cfg, args.local_rank, args.distributed, model_config)
    else:
        model = train(cfg, args.local_rank, args.distributed)

    if not args.skip_test:
        run_test(cfg, model, args.distributed)
Ejemplo n.º 11
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    _ = checkpointer.load(cfg.MODEL.WEIGHT)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder

    if cfg.TEST.MULTI_SCALE:
        data_loaders_val = []
        for min_size_test, max_size_test in cfg.TEST.MULTI_SIZES:
            cfg.defrost()
            cfg.INPUT.MIN_SIZE_TEST = min_size_test
            cfg.INPUT.MAX_SIZE_TEST = max_size_test
            cfg.freeze()
            data_loaders_val.extend(
                make_data_loader(cfg,
                                 is_train=False,
                                 is_distributed=distributed))
        output_folders = output_folders * len(cfg.TEST.MULTI_SIZES)
        dataset_names = dataset_names * len(cfg.TEST.MULTI_SIZES)
    else:
        data_loaders_val = make_data_loader(cfg,
                                            is_train=False,
                                            is_distributed=distributed)

    predictions = []

    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        prediction = inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
        predictions.append(prediction)

    if cfg.TEST.MULTI_SCALE:

        logger.info("Processing multi-scale bbox voting....")
        voted_predictions = voting(
            predictions,
            args.local_rank)  # box_voting(predictions, args.local_rank)
        torch.save(voted_predictions,
                   os.path.join(output_folders[0], 'predictions.pth'))

        extra_args = dict(
            box_only=cfg.MODEL.RPN_ONLY,
            iou_types=iou_types,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
        )

        evaluate(dataset=data_loaders_val[0].dataset,
                 predictions=voted_predictions,
                 output_folder=output_folders[0],
                 **extra_args)

    else:
        for prediction, output_folder, dataset_name, data_loader_val in zip(
                predictions, output_folders, dataset_names, data_loaders_val):
            extra_args = dict(
                box_only=cfg.MODEL.RPN_ONLY,
                iou_types=iou_types,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            )

            evaluate(dataset=data_loader_val.dataset,
                     predictions=prediction,
                     output_folder=output_folder,
                     **extra_args)
    return 0
Ejemplo n.º 12
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Training")
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    import random
    import torch.backends.cudnn as cudnn
    import numpy as np
    seed = 1
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed + 1)
    random.seed(seed + 2)
    np.random.seed(seed + 3)
    print('use seed')
    cudnn.deterministic = True

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = os.path.join(
        cfg.OUTPUT_DIR, cfg.SUBDIR,
        'GPU' + str(num_gpus) + '_LR' + str(cfg.SOLVER.BASE_LR))
    if output_dir:
        mkdir(output_dir)

    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    model = train(cfg, args.local_rank, args.distributed)

    if not args.skip_test:
        run_test(cfg, model, args.distributed)
Ejemplo n.º 13
0
def main():
    os.environ['CUDA_VISIBLE_DEVICES'] = '2'
    parser = argparse.ArgumentParser(description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="/home/asd/Project/AirplaneDetection/Gliding-vertex-Trainer/gliding_vertex-master/configs/glide/dota.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--ckpt",
        help="The path to the checkpoint for test, default is the latest checkpoint.",
        default="/home/asd/Project/AirplaneDetection/Gliding-vertex-Trainer/exp_dota/0909/model_final.pth",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(
            backend="nccl", init_method="env://"
        )
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt
    _ = checkpointer.load(ckpt, use_latest=args.ckpt is None)
    """
    # Initialize mixed-precision if necessary
    use_mixed_precision = cfg.DTYPE == 'float16'
    amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt
    _ = checkpointer.load(ckpt, use_latest=args.ckpt is None)

    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg, is_train=False, is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
    """

    from maskrcnn_benchmark.data.transforms.build import build_transforms
    from PIL import Image
    import torchvision.transforms.functional as F
    transform = build_transforms( cfg, is_train=False )

    img_dir = "/home/asd/Mission/GaoFen/airplane_detection/data/train/train_val/val/images"
    res_dir = "/home/asd/Mission/GaoFen/airplane_detection/data/train/train_val/val/res"
    model.eval()
    # imgs = os.listdir( img_dir )
    import glob
    imgs = glob.glob(img_dir+"/*.tif")
    for img in imgs:
        img_path = os.path.join( img_dir, img )
        img_pil = Image.open( img_path )
        # for i in range( 360 ):
        original_img = img_pil
        # original_img = F.rotate( img_pil, 45, expand=True )

        origin_w, origin_h = original_img.size
        img, target = transform( original_img, None )
        print(img.shape)
        img = img.view( (1, img.shape[0], img.shape[1], img.shape[2] ) )
        h, w = img.shape[2:]
        if h % 32 != 0:
            new_h = ( h // 32 + 1 ) * 32
        else:
            new_h = h
        if w % 32 != 0:
            new_w = ( w // 32 + 1 ) * 32
        else:
            new_w = w

        ratio_w = 1. * new_w / w
        ratio_h = 1. * new_h / h

        padded_img = torch.zeros( (1, 3, new_h, new_w)).float()
        padded_img[:, :, :h, :w] = img

        prediction = model( padded_img.cuda() )[0]
        prediction = prediction.resize((origin_w * ratio_w, origin_h * ratio_h))
        hboxes = prediction.bbox.cpu()
        rboxes = prediction.get_field( "rboxes" ).cpu()
        ratios = prediction.get_field( "ratios" ).cpu()
        scores = prediction.get_field( "scores" ).cpu()
        # labels = prediction.get_field( "labels" ).cpu()

        for rbox, ratio, score in zip( rboxes, ratios, scores ):
            print( rbox )
            print( ratio, score )

        h_idx = ratios > 0.8
        # print(hboxes)
        h = hboxes[h_idx]
        hboxes_vtx = torch.stack( [h[:, 0], h[:, 1], h[:, 2], h[:, 1], h[:, 2], h[:, 3], h[:, 0], h[:, 3]] ).permute( 1, 0 )
        rboxes[h_idx] = hboxes_vtx
        # rboxes = rboxes.data.numpy().astype( np.int32 )
        rboxes = rboxes.data.numpy()
        
        keep = poly_nms( np.hstack( [rboxes, scores.cpu().data.numpy()[:, np.newaxis]] ).astype( np.double ), 0.1 )

        rboxes = rboxes[keep].astype( np.int32 )
        scores = scores[keep]
        hboxes = hboxes[keep]

        keep = np.where( scores > 0.6 )
        rboxes = rboxes[keep]
        scores = scores[keep].tolist()
        hboxes = hboxes[keep]

        # rboxes = list( map( minAreaRect, rboxes ) )
        if len( rboxes ) > 0:
            rboxes = np.vstack( rboxes )
        else:
            rboxes = np.array( rboxes )

        # vis( img_info["file_name"], rboxes )

        # img = cv2.imread( original_img )
        img = np.array( original_img.convert( "RGB" ) )[:, :, ::-1].copy()
        cv2.polylines( img, rboxes.reshape(-1, 4, 2).astype( np.int32 ), True, (0, 255, 255), thickness=2, lineType=cv2.LINE_AA )
        filename = img_path.split( "/" )[-1]
        cv2.imwrite( "{}/{}".format( res_dir, filename ), img )
Ejemplo n.º 14
0
def do_train(
    model,
    data_loader,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    arguments,
    disable_allreduce_for_logging,
    per_iter_start_callback_fn=None,
    per_iter_end_callback_fn=None,
):
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()

    def prefetcher(load_iterator):
        prefetch_stream = torch.cuda.Stream()
        pad_batches = []

        def _prefetch():
            try:
                # I'm not sure why the trailing _ is necessary but the reference used
                # "for i, (images, targets, _) in enumerate(data_loader):" so I'll keep it.
                images, targets, _ = next(load_iterator)
            except StopIteration:
                return None, None

            with torch.cuda.stream(prefetch_stream):
                # TODO:  I'm not sure if the dataloader knows how to pin the targets' datatype.
                targets = [
                    target.to(device, non_blocking=True) for target in targets
                ]
                images = images.to(device, non_blocking=True)

            return images, targets

        next_images, next_targets = _prefetch()

        while next_images is not None:
            torch.cuda.current_stream().wait_stream(prefetch_stream)
            current_images, current_targets = next_images, next_targets
            next_images, next_targets = _prefetch()
            yield current_images, current_targets

    synchronize()
    optimizer.zero_grad()
    for iteration, (images,
                    targets) in enumerate(prefetcher(iter(data_loader)),
                                          start_iter):

        if per_iter_start_callback_fn is not None:
            per_iter_start_callback_fn(iteration=iteration)

        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        scheduler.step()

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        loss_dict = model(images, targets)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        if not disable_allreduce_for_logging:
            loss_dict_reduced = reduce_loss_dict(loss_dict)
            losses_reduced = sum(loss for loss in loss_dict_reduced.values())
            meters.update(loss=losses_reduced, **loss_dict_reduced)
        else:
            meters.update(loss=losses, **loss_dict)

        # optimizer.zero_grad()
        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        # with optimizer.scale_loss(losses) as scaled_losses:
        with amp.scale_loss(losses, optimizer) as scaled_losses:
            scaled_losses.backward()
        optimizer.step()
        # set_grads_to_none(model)
        optimizer.zero_grad()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration % checkpoint_period == 0 and arguments["save_checkpoints"]:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if iteration == max_iter and arguments["save_checkpoints"]:
            checkpointer.save("model_final", **arguments)

        # per-epoch work (testing)
        if per_iter_end_callback_fn is not None:
            # Note: iteration has been incremented previously for
            # human-readable checkpoint names (i.e. 60000 instead of 59999)
            # so need to adjust again here
            early_exit = per_iter_end_callback_fn(iteration=iteration - 1)
            if early_exit:
                break

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
    if per_iter_end_callback_fn is not None:
        if early_exit:
            return True
        else:
            return False
    else:
        return None
Ejemplo n.º 15
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Training")
    # 配置文件参数
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",  # 用于help 信息输出
        help="path to config file",
        type=str,
    )
    # 当前进程使用的GPU标号
    parser.add_argument("--local_rank", type=int, default=0)
    # 还不清楚这是干嘛的
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action=
        "store_true",  # 指定action='store_true'或者‘store_False’之后,在运行程序添加参数时直接输入变量名,可以省略对应的默认值True或者False
    )
    # 更改config文件的信息
    # 例:opts=['SOLVER.IMS_PER_BATCH', '2', 'SOLVER.BASE_LR', '0.0025']
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1

    # 多GPU训练
    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    # 把config文件融入到cfg文件
    cfg.merge_from_file(args.config_file)
    # 把opts列表融入到cfg文件
    cfg.merge_from_list(args.opts)
    # 使cfg及其所有子节点不可变
    cfg.freeze()

    # 训练结果存放的目录
    # _C.OUTPUT_DIR = "."
    output_dir = cfg.OUTPUT_DIR
    if output_dir:
        mkdir(output_dir)

    # 输出一些信息
    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    # 输出配置文件目录
    output_config_path = os.path.join(cfg.OUTPUT_DIR, 'config.yml')
    logger.info("Saving config into: {}".format(output_config_path))
    # save overloaded model config in the output directory
    save_config(cfg, output_config_path)

    model = train(cfg, args.local_rank, args.distributed)

    if not args.skip_test:
        run_test(cfg, model, args.distributed)
Ejemplo n.º 16
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = "."
    logger = setup_logger("maskrcnn_benchmark",
                          save_dir,
                          get_rank(),
                          filename='test_all_BDD_ckpts_log.txt')
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    # initialize model
    model = build_detection_model(cfg, save_features=False)
    model.to(cfg.MODEL.DEVICE)
    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    # initialize test type, output folders and dataloader
    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)

    # Testing on multiple checkpoints if the weight is a directory instead of a .pth file
    if cfg.MODEL.WEIGHT.endswith('.pth') or cfg.MODEL.WEIGHT.endswith('.pkl'):
        all_ckpy_names = [cfg.MODEL.WEIGHT]
    else:
        all_ckpy_names = sorted(
            glob.glob(os.path.join(cfg.MODEL.WEIGHT, '*.pth')))
    logger.info("Testing on checkpoints:", all_ckpy_names)
    for ckpt_name in all_ckpy_names:
        logger.info("Testing {}".format(ckpt_name))
        _ = checkpointer.load(ckpt_name)  #cfg.MODEL.WEIGHT)
        for output_folder, dataset_name, data_loader_val in zip(
                output_folders, dataset_names, data_loaders_val):
            # if the inference is done, only do the evaluation
            # if os.path.isfile(os.path.join(output_folder, "predictions.pth")):
            #     logger.info("Inference was done, only do evaluation!")
            #     predictions = torch.load(os.path.join(output_folder, "predictions.pth"))

            #     extra_args = dict(
            #                     box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            #                     iou_types=iou_types,
            #                     expected_results=cfg.TEST.EXPECTED_RESULTS,
            #                     expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            #                     )
            #     evaluate(dataset=data_loader_val.dataset,
            #             predictions=predictions,
            #             output_folder=output_folder,
            #             **extra_args)
            # else:
            # logger.info("No inference was done, run inference first")
            inference(
                model,
                data_loader_val,
                dataset_name=dataset_name,
                iou_types=iou_types,
                box_only=False
                if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=output_folder,
                convert_pred_coco2cityscapes=cfg.DATASETS.CONVERT,
            )
            synchronize()
Ejemplo n.º 17
0
def main():
    # setting default
    class_num = 21   # change numbers of class
    batch_size = 2   # change training batch size
    save_period = 50   # each 5000 iterations save and test once
    max_iteration = 400000  # train how much iterations
    lr_reduce_step = (300000, 340000)  # reduce learning rate at 300000 and 340000 iterations
    save_path = 'checkpoints/test'  # where to save the model (ex. modify checkpoint/XXXX)
    train_mode = 'kd'  # choose training mode (teacher/student/kd)

    parser = argparse.ArgumentParser(description="PyTorch Object Detection Training")
    parser.add_argument(
        "--teacher-config-file",
        default="../configs/centermask/centermask_V_19_eSE_FPN_ms_3x.yaml",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument(
        "--student-config-file",
        default="../configs/centermask/centermask_V_19_eSE_FPN_lite_res600_ms_bs16_4x.yaml",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "--opts",
        help="Modify config options using the command-line",
        default=['MODEL.FCOS.NUM_CLASSES', class_num, 'SOLVER.CHECKPOINT_PERIOD', save_period, 'SOLVER.TEST_PERIOD',
                 save_period, 'SOLVER.IMS_PER_BATCH', batch_size, 'SOLVER.MAX_ITER', max_iteration, 'SOLVER.STEPS',
                 lr_reduce_step, 'OUTPUT_DIR', save_path],
        nargs=argparse.REMAINDER,
    )

    # setting kd loss
    if train_mode == 'kd':
        parser.add_argument('--loss_head', default=True)
        parser.add_argument('--loss_correlation', default=True)
        parser.add_argument('--loss_featuremap', default=False)
    else:  # always False
        parser.add_argument('--loss_head', default=False)
        parser.add_argument('--loss_correlation', default=False)
        parser.add_argument('--loss_featuremap', default=False)

    global args
    args = parser.parse_args()

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(
            backend="nccl", init_method="env://"
        )
        synchronize()
    t_cfg = copy.deepcopy(cfg)

    cfg.merge_from_file(args.student_config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    t_cfg.merge_from_file(args.teacher_config_file)
    t_cfg.merge_from_list(args.opts)
    t_cfg.freeze()
    if train_mode == 'teacher':
        total_cfg = [t_cfg]
    elif train_mode == 'student':
        total_cfg = [cfg]
    else:
        total_cfg = [cfg, t_cfg]

    output_dir = total_cfg[0].OUTPUT_DIR
    if output_dir:
        mkdir(output_dir)

    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    if train_mode == 'teacher':
        logger.info("Loaded configuration file {}".format(args.teacher_config_file))
    else:
        logger.info("Loaded configuration file {}".format(args.student_config_file))
    with open(args.student_config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(total_cfg[0]))

    model = train(total_cfg, args.local_rank, args.distributed)

    if not args.skip_test:
        run_test(total_cfg[0], model, args.distributed)
Ejemplo n.º 18
0
    def train(self, local_rank):
        if local_rank == 0:
            self.logger.info(
                'population_num = {} select_num = {} mutation_num = {} '
                'crossover_num = {} random_num = {} max_epochs = {}'.format(
                    search_config.population_num, search_config.select_num,
                    search_config.mutation_num, search_config.crossover_num,
                    search_config.population_num - search_config.mutation_num -
                    search_config.crossover_num, search_config.max_epochs))

            if not self.load_checkpoint():
                self.candidates = self.random_can(search_config.population_num)
                self.save_checkpoint()

        while self.epoch < search_config.max_epochs:
            self.logger.info('epoch = {}'.format(self.epoch))

            if isinstance(self.candidates, list):
                self.candidates = torch.Tensor(self.candidates).long().cuda()

            if self.distributed:
                dist.broadcast(self.candidates, 0)

            self.candidates = [
                tuple(cand.tolist()) for cand in self.candidates
            ]

            loss_scale_hists = []
            results_scales = []
            for cand in self.candidates:
                synchronize()
                cfg.AUTOAUG.LIST = cand
                loss_scale_hist, results_scale = self.evaluate_single_aug(
                    cand, local_rank)
                loss_scale_hists.append(loss_scale_hist)
                results_scales.append(results_scale)

            self.epoch += 1
            if local_rank > 0:
                continue
            self.logger.info('Evaluation finish')

            for i, cand in enumerate(self.candidates):
                loss_hist = copy.deepcopy(loss_scale_hists[i])
                loss_hist /= loss_hist.sum()
                err = loss_hist.std()
                for j, result_s in enumerate(self.results_scale_baseline):
                    if results_scales[i][j] < result_s:
                        self.logger.info(
                            'Punishment for sarcrificing other scales : %s (baseline: %s) in %d th scale of %s.'
                            % (str(copy.deepcopy(results_scales[i])),
                               str(self.results_scale_baseline), j, str(cand)))
                        err *= (result_s / results_scales[i][j])

                # A regularization to avoid probabilities decay to zero.
                l_prob = (9 - np.array(cand)[self.prob_idx].mean()) * 1e-2
                err += l_prob
                self.vis_dict[cand]['err'] = err
                self.vis_dict[cand]['loss_hist'] = str(loss_scale_hists[i])

            self.memory.append([])
            for cand in self.candidates:
                self.memory[-1].append(cand)
                self.vis_dict[cand]['visited'] = True

            self.update_top_k(self.candidates,
                              k=search_config.select_num,
                              key=lambda x: self.vis_dict[x]['err'])
            self.update_top_k(self.candidates,
                              k=50,
                              key=lambda x: self.vis_dict[x]['err'])

            self.logger.info('epoch = {} : top {} result'.format(
                self.epoch - 1, len(self.keep_top_k[50])))
            for i, cand in enumerate(self.keep_top_k[50]):
                self.logger.info(
                    'No.{} {} Top-1 err = {} loss hist = {}'.format(
                        i + 1, cand, self.vis_dict[cand]['err'],
                        self.vis_dict[cand]['loss_hist']))
                ops = [search_config.blocks_keys[i] for i in cand]
                self.logger.info(ops)

            mutation = self.get_mutation(search_config.select_num,
                                         search_config.mutation_num,
                                         search_config.m_prob)
            crossover = self.get_crossover(search_config.select_num,
                                           search_config.crossover_num)
            rand = self.random_can(search_config.population_num -
                                   len(mutation) - len(crossover))

            self.candidates = mutation + crossover + rand

            self.save_checkpoint()

        synchronize()
        self.logger.info(self.keep_top_k[search_config.select_num])
        self.logger.info('finish!')
Ejemplo n.º 19
0
def do_train(
    cfg,
    total_model,
    data_loader,
    data_loader_val,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    test_period,
    arguments,
    args,
):
    if len(total_model) > 1:
        model = total_model[1]
        t_model = total_model[0]
    else:
        model = total_model[0]
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    model.train()

    start_training_time = time.time()
    end = time.time()

    iou_types = ("bbox", )
    if cfg[0].MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg[0].MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    dataset_names = cfg[0].DATASETS.TEST

    pytorch_1_1_0_or_later = is_pytorch_1_1_0_or_later()
    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        # in pytorch >= 1.1.0, scheduler.step() should be run after optimizer.step()
        if not pytorch_1_1_0_or_later:
            scheduler.step()

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        loss_dict, features_dict = model(images, targets)
        if len(total_model) > 1:
            with torch.no_grad():
                t_loss_dict, t_features_dict = t_model(images, targets)
            # with torch.no_grad():
            #     # teacher_model = t_model
            #     t_weight = torch.load('./weights/centermask-V-19-eSE-FPN-ms-3x.pth')
            #     t_weight = t_weight['model']
            #     new_tweight = OrderedDict()
            #     for k, v in t_weight.items():
            #         name = k[7:]  # remove `module.`
            #         new_tweight[name] = v
            #     t_model.load_state_dict(new_tweight)
            #     t_loss_dict, t_features_dict = t_model(images, targets)

        if args.loss_head:

            loss_regression = new_box_loss(t_loss_dict['loss_reg'],
                                           loss_dict['loss_reg'])
            loss_center = new_center_loss(t_loss_dict['loss_centerness'],
                                          loss_dict['loss_centerness'])
            mode = 'KL'  # mode = 'KL' or 'cross-entropy'
            loss_pixel_wise = pixel_wise_loss(features_dict['box_cls'],
                                              t_features_dict['box_cls'], mode)
            loss_head = (loss_regression + loss_center + loss_pixel_wise)
            loss_dict.setdefault('loss_head', loss_head)
            del loss_dict['loss_reg']
            del loss_dict['loss_centerness']

        if iteration > cfg[0].SOLVER.WARMUP_ITERS:
            if args.loss_correlation:
                correlation = True
                loss_corr = get_feature(t_model, model, images, targets,
                                        correlation)
                loss_dict.setdefault('loss_corr', loss_corr)
            if args.loss_featuremap:
                correlation = False
                loss_featuremap = get_feature(t_model, model, images, targets,
                                              correlation)
                loss_dict.setdefault('loss_featuremap', loss_featuremap)

        losses = sum(loss for loss in loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        loss_dict_reduced = reduce_loss_dict(loss_dict)
        losses_reduced = sum(loss for loss in loss_dict_reduced.values())
        meters.update(loss=losses_reduced, **loss_dict_reduced)

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        if pytorch_1_1_0_or_later:
            scheduler.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join([
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if data_loader_val is not None and test_period > 0 and iteration % test_period == 0 and iteration != 0:
            meters_val = MetricLogger(delimiter="  ")
            synchronize()
            _ = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg[0],
                                 is_train=False,
                                 is_distributed=(get_world_size() > 1),
                                 is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False
                if cfg[0].MODEL.MASK_ON else cfg[0].MODEL.RPN_ONLY,
                device=cfg[0].MODEL.DEVICE,
                expected_results=cfg[0].TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg[0].TEST.
                EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=None,
            )
            synchronize()
            model.train()
            with torch.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val,
                                    _) in enumerate(tqdm(data_loader_val)):
                    images_val = images_val.to(device)
                    targets_val = [target.to(device) for target in targets_val]
                    loss_dict = model(images_val, targets_val)
                    if len(loss_dict) > 1:
                        loss_dict = loss_dict[0]
                    else:
                        loss_dict = loss_dict
                    losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = reduce_loss_dict(loss_dict)
                    losses_reduced = sum(
                        loss for loss in loss_dict_reduced.values())
                    meters_val.update(loss=losses_reduced, **loss_dict_reduced)
            synchronize()
            logger.info(
                meters_val.delimiter.join([
                    "[Validation]: ",
                    "eta: {eta}",
                    "iter: {iter}",
                    "{meters}",
                    "lr: {lr:.6f}",
                    "max mem: {memory:.0f}",
                ]).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters_val),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                ))
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info("Total training time: {} ({:.4f} s / it)".format(
        total_time_str, total_training_time / (max_iter)))
Ejemplo n.º 20
0
def run_test(cfg, model, distributed):
    if distributed:
        model = model.module
    torch.cuda.empty_cache()  # TODO check if it helps
    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    # output_folders = [None] * len(cfg.DATASETS.TEST)
    # dataset_names = cfg.DATASETS.TEST
    dataset_names = cfg.DATASETS.NAS_VAL if not cfg.NAS.TRAIN_SINGLE_MODEL else cfg.DATASETS.TEST
    output_folders = [None] * len(dataset_names)

    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)

    if cfg.NAS.TRAIN_SINGLE_MODEL:
        if get_rank() == 0:
            print('==' * 20, 'Evaluating single model...', '==' * 20)
        for output_folder, dataset_name, data_loader_val in zip(
                output_folders, dataset_names, data_loaders_val):
            inference(
                model,
                data_loader_val,
                dataset_name=dataset_name,
                iou_types=iou_types,
                box_only=False
                if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                bbox_aug=cfg.TEST.BBOX_AUG.ENABLED,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=output_folder,
                c2d_json_path=cfg.MODEL.SEG_BRANCH.JSON_PATH,
                cfg=cfg,
            )
            synchronize()
        if get_rank() == 0:
            if 'coco' in cfg.DATASETS.NAME.lower():
                print('Evaluating panoptic results on COCO...')
                os.system(
                    'sh panoptic_scripts/bash_coco_val_evaluate.sh {} | tee pq_results'
                    .format(cfg.OUTPUT_DIR))
    elif not cfg.NAS.SKIP_NAS_TEST:
        if get_rank() == 0:
            print('==' * 10, 'Start NAS testing', '==' * 10)
        timer = Timer()
        timer.tic()
        searcher = PathPrioritySearch(cfg, base_dir='./nas_test')
        searcher.generate_fair_test(
        )  # load cache results and generate new model for test
        searcher.search(model, output_folders, dataset_names, distributed)
        searcher.save_topk()
        total_time = timer.toc()
        total_time_str = get_time_str(total_time)
        if get_rank() == 0:
            print('Finish NAS testing, total time:{}'.format(total_time_str))
        os._exit(0)
    else:
        print('Skipping NAS testing...')
Ejemplo n.º 21
0
def main():
    mlperf_log.ROOT_DIR_MASKRCNN = os.path.dirname(os.path.abspath(__file__))

    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Training")
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1

    if is_main_process:
        # Setting logging file parameters for compliance logging
        os.environ["COMPLIANCE_FILE"] = './MASKRCNN_complVv0.5.0_' + str(
            datetime.datetime.now())
        mlperf_log.LOG_FILE = os.getenv("COMPLIANCE_FILE")
        mlperf_log._FILE_HANDLER = logging.FileHandler(mlperf_log.LOG_FILE)
        mlperf_log._FILE_HANDLER.setLevel(logging.DEBUG)
        mlperf_log.LOGGER.addHandler(mlperf_log._FILE_HANDLER)

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

        print_mlperf(key=mlperf_log.RUN_START)

        # setting seeds - needs to be timed, so after RUN_START
        if is_main_process():
            master_seed = random.SystemRandom().randint(0, 2**32 - 1)
            seed_tensor = torch.tensor(master_seed,
                                       dtype=torch.float32,
                                       device=torch.device("cuda"))
        else:
            seed_tensor = torch.tensor(0,
                                       dtype=torch.float32,
                                       device=torch.device("cuda"))

        torch.distributed.broadcast(seed_tensor, 0)
        master_seed = int(seed_tensor.item())
    else:
        print_mlperf(key=mlperf_log.RUN_START)
        # random master seed, random.SystemRandom() uses /dev/urandom on Unix
        master_seed = random.SystemRandom().randint(0, 2**32 - 1)

    # actually use the random seed
    args.seed = master_seed
    # random number generator with seed set to master_seed
    random_number_generator = random.Random(master_seed)
    print_mlperf(key=mlperf_log.RUN_SET_RANDOM_SEED, value=master_seed)

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir:
        mkdir(output_dir)

    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    # generate worker seeds, one seed for every distributed worker
    worker_seeds = generate_seeds(
        random_number_generator,
        torch.distributed.get_world_size()
        if torch.distributed.is_initialized() else 1)

    # todo sharath what if CPU
    # broadcast seeds from rank=0 to other workers
    worker_seeds = broadcast_seeds(worker_seeds, device='cuda')

    # Setting worker seeds
    logger.info("Worker {}: Setting seed {}".format(
        args.local_rank, worker_seeds[args.local_rank]))
    torch.manual_seed(worker_seeds[args.local_rank])

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    model = train(cfg, args.local_rank, args.distributed)

    print_mlperf(key=mlperf_log.RUN_FINAL)
Ejemplo n.º 22
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default=
        "/private/home/fmassa/github/detectron.pytorch_v2/configs/e2e_faster_rcnn_R_50_C4_1x_caffe2.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--ckpt",
        help=
        "The path to the checkpoint for test, default is the latest checkpoint.",
        default=None,
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    # Initialize mixed-precision if necessary
    use_mixed_precision = cfg.DTYPE == 'float16'
    amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)
    ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt
    _ = checkpointer.load(ckpt, use_latest=args.ckpt is None)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()
Ejemplo n.º 23
0
def main():
    #     apply_prior   prior_mask
    # 0        -             -
    # 1        Y             -
    # 2        -             Y
    # 3        Y             Y
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--ckpt",
        help=
        "The path to the checkpoint for test, default is the latest checkpoint.",
        default=None,
    )
    parser.add_argument('--num_iteration',
                        dest='num_iteration',
                        help='Specify which weight to load',
                        default=-1,
                        type=int)
    parser.add_argument('--object_thres',
                        dest='object_thres',
                        help='Object threshold',
                        default=0.4,
                        type=float)  # used to be 0.4 or 0.05
    parser.add_argument('--human_thres',
                        dest='human_thres',
                        help='Human threshold',
                        default=0.6,
                        type=float)
    parser.add_argument('--prior_flag',
                        dest='prior_flag',
                        help='whether use prior_flag',
                        default=1,
                        type=int)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1 and torch.cuda.is_available()

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    print('prior flag: {}'.format(args.prior_flag))

    ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
    args.config_file = os.path.join(ROOT_DIR, args.config_file)

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("DRG.inference", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    model.to(device)

    # Initialize mixed-precision if necessary
    use_mixed_precision = cfg.DTYPE == 'float16'
    amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)

    if args.num_iteration != -1:
        args.ckpt = os.path.join(cfg.OUTPUT_DIR,
                                 'model_%07d.pth' % args.num_iteration)
    ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt
    logger.info("Testing checkpoint {}".format(ckpt))
    _ = checkpointer.load(ckpt, use_latest=args.ckpt is None)

    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            if args.num_iteration != -1:
                output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_sp",
                                             dataset_name,
                                             "model_%07d" % args.num_iteration)
            else:
                output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_sp",
                                             dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder

    opt = {}
    opt['word_dim'] = 300
    opt['use_thres_dic'] = 1
    for output_folder, dataset_name in zip(output_folders, dataset_names):
        data = DatasetCatalog.get(dataset_name)
        data_args = data["args"]
        test_detection = pickle.load(open(data_args['test_detection_file'],
                                          "rb"),
                                     encoding='latin1')
        word_embeddings = pickle.load(open(data_args['word_embedding_file'],
                                           "rb"),
                                      encoding='latin1')
        opt['thres_dic'] = pickle.load(open(data_args['threshold_dic'], "rb"),
                                       encoding='latin1')
        output_file = os.path.join(output_folder, 'detection.pkl')
        # hico_folder = os.path.join(output_folder, 'HICO')
        output_map_folder = os.path.join(output_folder, 'map')

        logger.info("Output will be saved in {}".format(output_file))
        logger.info("Start evaluation on {} dataset.".format(dataset_name))

        run_test(model,
                 dataset_name=dataset_name,
                 test_detection=test_detection,
                 word_embeddings=word_embeddings,
                 output_file=output_file,
                 object_thres=args.object_thres,
                 human_thres=args.human_thres,
                 device=device,
                 cfg=cfg,
                 opt=opt)

        # Generate_HICO_detection(output_file, hico_folder)
        compute_hico_map(output_map_folder, output_file, 'test')
Ejemplo n.º 24
0
def main():
    args = parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.merge_from_list(["MODEL.WEIGHT", args.weight])

    output_dir = os.path.dirname(cfg.MODEL.WEIGHT)
    cfg.OUTPUT_DIR = output_dir
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    cfg.freeze()

    save_dir = ""
    logger = setup_logger("maskrcnn_benchmark", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    checkpointer = DetectronCheckpointer(cfg, model, save_dir=cfg.MODEL.WEIGHT)
    _ = checkpointer.load(cfg.MODEL.WEIGHT, cfg.TRAIN.IGNORE_LIST)

    iou_types = ("bbox", )
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm", )
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST

    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=distributed)

    # default `log_dir` is "runs" - we'll be more specific here
    # tb_writer = SummaryWriter('runs/6dvnet_test_3d_1')

    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        # dataiter = iter(data_loader_val)
        # images, bbox, labels = dataiter.next()

        # create grid of images
        # img_grid = make_grid(images.tensors)

        # show images
        # matplotlib_imshow(img_grid, one_channel=False)

        # write to tensorboard
        # tb_writer.add_image('6dvnet_test_3d_1', img_grid)
        #
        # tb_writer.add_graph(model, images.tensors)
        # tb_writer.close()

        inference(
            model,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=iou_types,
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
            cfg=cfg,
        )
        synchronize()
Ejemplo n.º 25
0
def main():

    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Training")
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank",
                        type=int,
                        default=os.getenv('LOCAL_RANK', 0))
    parser.add_argument("--max_steps",
                        type=int,
                        default=0,
                        help="Override number of training steps in the config")
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument("--fp16",
                        help="Mixed precision training",
                        action="store_true")
    parser.add_argument("--amp",
                        help="Mixed precision training",
                        action="store_true")
    parser.add_argument('--skip_checkpoint',
                        default=False,
                        action='store_true',
                        help="Whether to save checkpoints")
    parser.add_argument(
        "--json-summary",
        help="Out file for DLLogger",
        default="dllogger.out",
        type=str,
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )
    args = parser.parse_args()
    args.fp16 = args.fp16 or args.amp

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)

    # Redundant option - Override config parameter with command line input
    if args.max_steps > 0:
        cfg.SOLVER.MAX_ITER = args.max_steps

    if args.skip_checkpoint:
        cfg.SAVE_CHECKPOINT = False

    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir:
        mkdir(output_dir)

    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    if is_main_process():
        dllogger.init(backends=[
            dllogger.JSONStreamBackend(verbosity=dllogger.Verbosity.VERBOSE,
                                       filename=args.json_summary),
            dllogger.StdOutBackend(verbosity=dllogger.Verbosity.VERBOSE,
                                   step_format=format_step)
        ])
    else:
        dllogger.init(backends=[])

    dllogger.log(step="PARAMETER", data={"gpu_count": num_gpus})
    # dllogger.log(step="PARAMETER", data={"environment_info": collect_env_info()})
    dllogger.log(step="PARAMETER", data={"config_file": args.config_file})

    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()

    dllogger.log(step="PARAMETER", data={"config": cfg})

    if args.fp16:
        fp16 = True
    else:
        fp16 = False

    model, iters_per_epoch = train(cfg, args.local_rank, args.distributed,
                                   fp16, dllogger)

    if not args.skip_test:
        if not cfg.PER_EPOCH_EVAL:
            test_model(cfg, model, args.distributed, iters_per_epoch, dllogger)
def main():
    parser = argparse.ArgumentParser(description="PyTorch Object Detection Training")
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1
    size = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    "MASTER_ADDR"
    "MASTER_PORT"
    "RANK"
    "WORLD_SIZE"
    if args.distributed:
        dist_url = "tcp://"+os.environ["MASTER_ADDR"]+":"+os.environ["MASTER_PORT"]
        rank = os.environ["RANK"]
        print("dist_url: ",dist_url)
        print("rank: " ,rank)
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(
            backend="nccl", init_method=dist_url,rank=rank,world_size=size
        )
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir:
        mkdir(output_dir)

    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    model = train(cfg, args.local_rank, args.distributed)

    if not args.skip_test:
        run_test(cfg, model, args.distributed)
Ejemplo n.º 27
0
def inference(
        model,
        data_loader,
        dataset_name,
        iou_types=("bbox", ),
        box_only=False,
        bbox_aug=False,
        device="cuda",
        expected_results=(),
        expected_results_sigma_tol=4,
        output_folder=None,
):
    # convert to a torch.device for efficiency
    device = torch.device(device)
    num_devices = get_world_size()
    logger = logging.getLogger("maskrcnn_benchmark.inference")
    dataset = data_loader.dataset
    logger.info("Start evaluation on {} dataset({} images).".format(
        dataset_name, len(dataset)))
    total_timer = Timer()
    inference_timer = Timer()
    total_timer.tic()
    predictions = compute_on_dataset(model, data_loader, device, bbox_aug,
                                     inference_timer)
    # wait for all processes to complete before measuring the time
    synchronize()
    print('>>>>>>==============results_dict_cpu.keys()=',
          len(predictions.keys()), predictions.keys())

    total_time = total_timer.toc()
    total_time_str = get_time_str(total_time)
    logger.info(
        "Total run time: {} ({} s / img per device, on {} devices)".format(
            total_time_str, total_time * num_devices / len(dataset),
            num_devices))
    total_infer_time = get_time_str(inference_timer.total_time)
    logger.info(
        "Model inference time: {} ({} s / img per device, on {} devices)".
        format(
            total_infer_time,
            inference_timer.total_time * num_devices / len(dataset),
            num_devices,
        ))

    predictions = _accumulate_predictions_from_multiple_gpus(predictions)
    print('>>>>>><<<<<<<<<<<==============results_dict_cpu.keys()=',
          len(predictions))
    print(predictions[0])

    if not is_main_process():
        return

    if output_folder:
        torch.save(predictions, os.path.join(output_folder, "predictions.pth"))

    extra_args = dict(
        box_only=box_only,
        iou_types=iou_types,
        expected_results=expected_results,
        expected_results_sigma_tol=expected_results_sigma_tol,
    )

    return evaluate(dataset=dataset,
                    predictions=predictions,
                    output_folder=output_folder,
                    **extra_args)
Ejemplo n.º 28
0
def main():
    torch.cuda.set_device(7)

    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Training")
    parser.add_argument(
        "--config-file",
        default=
        "/home/SelfDriving/maskrcnn/maskrcnn-benchmark/configs/e2e_faster_rcnn_R_50_C4_1x.yaml",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
    )
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    output_dir = cfg.OUTPUT_DIR
    if output_dir:
        mkdir(output_dir)

    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    output_config_path = os.path.join(cfg.OUTPUT_DIR, 'config.yml')
    logger.info("Saving config into: {}".format(output_config_path))
    # save overloaded model config in the output directory
    save_config(cfg, output_config_path)

    model = train(cfg, args.local_rank, args.distributed)

    if not args.skip_test:
        run_test(cfg, model, args.distributed)
Ejemplo n.º 29
0
def train(cfg, local_rank, distributed, d_path=None):

    MaskDnet = MaskDiscriminator(nc=256)
    BBoxDnet = BoxDiscriminator(nc=256, ndf=64)
    Dnet = CombinedDiscriminator(MaskDnet, BBoxDnet)
    model = Mask_RCNN(cfg)
    g_rcnn = GAN_RCNN(model, Dnet)

    device = torch.device(cfg.MODEL.DEVICE)
    g_rcnn.to(device)

    g_optimizer = make_optimizer(cfg, model)
    d_optimizer = make_D_optimizer(cfg, Dnet)

    g_scheduler = make_lr_scheduler(cfg, g_optimizer)
    d_scheduler = make_lr_scheduler(cfg, d_optimizer)
    # model.BoxDnet = BBoxDnet

    # Initialize mixed-precision training
    use_mixed_precision = cfg.DTYPE == "float16"
    amp_opt_level = 'O1' if use_mixed_precision else 'O0'
    model, g_optimizer = amp.initialize(model, g_optimizer, opt_level=amp_opt_level)
    Dnet, d_optimizer = amp.initialize(Dnet, d_optimizer, opt_level=amp_opt_level)

    if distributed:
        g_rcnn = torch.nn.parallel.DistributedDataParallel(
                    g_rcnn, device_ids=[local_rank], output_device=local_rank,
                    # this should be removed if we update BatchNorm stats
                    broadcast_buffers=False,
                )

    arguments = {}
    arguments["iteration"] = 0

    output_dir = cfg.OUTPUT_DIR

    save_to_disk = get_rank() == 0
    checkpointer = DetectronCheckpointer(
        cfg, model, g_optimizer, g_scheduler, output_dir, save_to_disk
    )

    extra_checkpoint_data = checkpointer.load(cfg.MODEL.WEIGHT)

    arguments.update(extra_checkpoint_data)

    d_checkpointer = DetectronCheckpointer(
        cfg, Dnet, d_optimizer, d_scheduler, output_dir, save_to_disk
    )

    if d_path:
        d_checkpointer.load(d_path, use_latest=False)

    data_loader = make_data_loader(
            cfg,
            is_train=True,
            is_distributed=distributed,
            start_iter=arguments["iteration"],
        )

    test_period = cfg.SOLVER.TEST_PERIOD
    data_loader_val = make_data_loader(cfg, is_train=False, is_distributed=distributed, is_for_period=True)

    checkpoint_period = cfg.SOLVER.CHECKPOINT_PERIOD

    ## START TRAINING
    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")

    meters = TensorboardLogger(
            log_dir=cfg.OUTPUT_DIR + "/tensorboardX",
            start_iter=arguments['iteration'],
            delimiter="  ")

    max_iter = len(data_loader)
    start_iter = arguments["iteration"]
    g_rcnn.train()
    start_training_time = time.time()
    end = time.time()

    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)

    dataset_names = cfg.DATASETS.TEST

    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):

        if any(len(target) < 1 for target in targets):
            logger.error(f"Iteration={iteration + 1} || Image Ids used for training {_} || targets Length={[len(target) for target in targets]}" )
            continue
        data_time = time.time() - end
        iteration = iteration + 1
        arguments["iteration"] = iteration

        images = images.to(device)
        targets = [target.to(device) for target in targets]

        g_loss_dict, d_loss_dict = g_rcnn(images, targets)

        g_losses = sum(loss for loss in g_loss_dict.values())
        d_losses = sum(loss for loss in d_loss_dict.values())

        # reduce losses over all GPUs for logging purposes
        g_loss_dict_reduced = reduce_loss_dict(g_loss_dict)
        g_losses_reduced = sum(loss for loss in g_loss_dict_reduced.values())
        
        d_loss_dict_reduced = reduce_loss_dict(d_loss_dict)
        d_losses_reduced = sum(loss for loss in d_loss_dict_reduced.values())
        
        meters.update(total_g_loss=g_losses_reduced, **g_loss_dict_reduced)
        meters.update(total_d_loss=d_losses_reduced, **d_loss_dict_reduced)

        g_optimizer.zero_grad()
        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        with amp.scale_loss(g_losses, g_optimizer) as g_scaled_losses:
            g_scaled_losses.backward()
        g_optimizer.step()
        g_scheduler.step()
        
        
        d_optimizer.zero_grad()
        # Note: If mixed precision is not used, this ends up doing nothing
        # Otherwise apply loss scaling for mixed-precision recipe
        with amp.scale_loss(d_losses, d_optimizer) as d_scaled_losses:
            d_scaled_losses.backward()
        d_optimizer.step()
        d_scheduler.step()

        batch_time = time.time() - end
        end = time.time()
        meters.update(time=batch_time, data=data_time)

        eta_seconds = meters.time.global_avg * (max_iter - iteration)
        eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))

        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join(
                    [
                        "eta: {eta}",
                        "iter: {iter}",
                        "{meters}",
                        "lr: {lr:.6f}",
                        "max mem: {memory:.0f}",
                    ]
                ).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=g_optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                )
            )
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
            d_checkpointer.save("dnet_{:07d}".format(iteration), **arguments)
            
        if data_loader_val is not None and test_period > 0 and iteration % test_period == 0:
            meters_val = MetricLogger(delimiter="  ")
            synchronize()
            _ = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg, is_train=False, is_distributed=False, is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=cfg.OUTPUT_DIR,
            )
            synchronize()
            model.train()
            with torch.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val, _) in enumerate(tqdm(data_loader_val)):
                    images_val = images_val.to(device)
                    targets_val = [target.to(device) for target in targets_val]
                    loss_dict = model(images_val, targets_val)
                    losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = reduce_loss_dict(loss_dict)
                    losses_reduced = sum(loss for loss in loss_dict_reduced.values())
                    meters_val.update(loss=losses_reduced, **loss_dict_reduced)
            synchronize()
            logger.info(
                meters_val.delimiter.join(
                    [
                        "[Validation]: ",
                        "eta: {eta}",
                        "iter: {iter}",
                        "{meters}",
                        "lr: {lr:.6f}",
                        "max mem: {memory:.0f}",
                    ]
                ).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters_val),
                    lr=g_optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                )
            )
        
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info(
        "Total training time: {} ({:.4f} s / it)".format(
            total_time_str, total_training_time / (max_iter)
        )
    )
Ejemplo n.º 30
0
def do_train(
    cfg,
    model,
    data_loader_support,
    data_loader_query,
    data_loader_val_support,
    data_loader_val_test,
    optimizer,
    scheduler,
    checkpointer,
    device,
    checkpoint_period,
    test_period,
    arguments,
    meters,
    meters_val,
):

    logger = logging.getLogger("maskrcnn_benchmark.trainer")
    logger.info("Start training")
    # meters = MetricLogger(delimiter="  ")
    max_iter = len(data_loader_support)
    start_iter = arguments["iteration"]
    model.train()
    start_training_time = time.time()
    end = time.time()

    batch_cls_json_file = cfg.MODEL.FEW_SHOT.SUP_INDICE_CLS
    with open(batch_cls_json_file, 'r') as f:
        batch_cls_sup = json.load(f)

    if cfg.MODEL.QRY_BALANCE:
        qry_cls_json_file = cfg.MODEL.QRY_INDICE_CLS
        with open(qry_cls_json_file, 'r') as f:
            batch_cls_qry = json.load(f)

    iou_types = ("bbox",)
    if cfg.MODEL.MASK_ON:
        iou_types = iou_types + ("segm",)
    if cfg.MODEL.KEYPOINT_ON:
        iou_types = iou_types + ("keypoints",)
    rank = dist.get_rank()
    # if is_main_process():
    #     import pdb
    #     pdb.set_trace()
    # else:
    #     return
    # for name, param in model. named_parameters():
    #     print(name, param, True if param.grad is not None else False)

    query_iterator = data_loader_query.__iter__()
    # print('len(data_loader_query):', len(data_loader_query))
    # import pdb; pdb.set_trace()
    weights_novel_all = []
    iteration_qry = 0
    for iteration, (images_sup, targets_sup, idx) in enumerate(data_loader_support, start_iter):
        if any(len(target) < 1 for target in targets_sup):
            logger.error(f"Iteration={iteration + 1} || Image Ids used for training support {idx} || targets Length={[len(target) for target in targets_sup]}")
            continue
        data_time = time.time() - end
        batch_id = batch_cls_sup[rank][iteration]

        iteration = iteration + 1
        arguments["iteration"] = iteration
        scheduler.step()
        images_sup = images_sup.to(device)
        targets_sup = [target.to(device) for target in targets_sup]
        # update weight:
        # print(targets_sup)
        # if is_main_process():
        #     import pdb
        #     pdb.set_trace()
        # else:
        #     return
        # print(iteration, idx, batch_id, targets_sup[0].extra_fields)

        weight_novel = model(images_sup, targets_sup,
                             is_support=True, batch_id=batch_id)
        # weights_novel[rank] = weight_novel
        # print('batch_id', batch_id, weight_novel[:10])
        # weight_novel = {batch_id:weight_novel}
        torch.cuda.empty_cache()

        # synchronize()
        weights_novel = [torch.empty_like(weight_novel)
                         for i in range(dist.get_world_size())]
        weights_novel = torch.cat(
            diffdist.functional.all_gather(weights_novel, weight_novel))
        # print(weights_novel[:,:10])
        # if is_main_process():
        #     import pdb
        #     pdb.set_trace()
        # else:
        #     return
        weights_novel_all.append(weights_novel)
        # # print(weights_novel_all)
        # print(torch.cat(weights_novel_all).size())
        # print(torch.cat(weights_novel_all)[:,:10])
        # (torch.cat(gather_list) * torch.cat(gather_list)).mean().backward()
        # print(weights_novel)
        if iteration % iter_size == 0:
            optimizer.zero_grad()
            losses_reduced = 0
            loss_dict_all = {}
            for i in range(iter_size_qry):
                images_qry, targets_qry, idx = query_iterator.next()
                images_qry = images_qry.to(device)
                targets_qry = [target.to(device) for target in targets_qry]
                if cfg.MODEL.QRY_BALANCE:
                    batch_id_qry = batch_cls_qry[rank][iteration_qry]
                    iteration_qry += 1
                    loss_dict = model(images_qry, targets_qry,
                                      is_query=True, batch_id=batch_id_qry, weights_novel=torch.cat(weights_novel_all))
                else:
                    loss_dict = model(images_qry, targets_qry,
                                      is_query=True, weights_novel=torch.cat(weights_novel_all))
                # if is_main_process():
                #     print('loss_dict', loss_dict)
                losses = sum(loss for loss in loss_dict.values()
                             ) / iter_size_qry
                # losses.backward(retain_graph=True)
                with amp.scale_loss(losses, optimizer) as scaled_losses:
                    scaled_losses.backward(retain_graph=True)
                torch.cuda.empty_cache()
                loss_dict_all = add_dict(loss_dict_all, loss_dict)
            loss_dict_all = avg_dict(loss_dict_all)
            # if is_main_process():
            #     print('loss_dict_all', loss_dict_all)
            # reduce losses over all GPUs for logging purposes
            loss_dict_reduced = reduce_loss_dict(loss_dict_all)
            # if is_main_process():
            #     print('loss_dict_reduced', loss_dict_reduced)
            losses_reduced = sum(loss for loss in loss_dict_reduced.values())
            # losses_dict_reduced = add_dict(losses_dict_reduced, loss_dict_reduced)

            meters.update(iteration / iter_size_qry, loss=losses_reduced,
                          lr=optimizer.param_groups[0]["lr"], **loss_dict_reduced)

            weights_novel_all = []

            # (weights_novel * weights_novel).mean().backward()
            # for name, param in model. named_parameters():
            # if 'backbone' not in name:
            # print(name, True if param.grad is not None else False)
            optimizer.step()
            batch_time = time.time() - end
            end = time.time()
            meters.update(iteration, time=batch_time, data=data_time)
            eta_seconds = meters.time.global_avg * (max_iter - iteration)
            eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
            torch.cuda.empty_cache()
        if iteration % 20 == 0 or iteration == max_iter:
            logger.info(
                meters.delimiter.join(
                    [
                        "eta: {eta}",
                        "iter: {iter}",
                        "{meters}",
                        "lr: {lr:.6f}",
                        "max mem: {memory:.0f}",
                    ]
                ).format(
                    eta=eta_string,
                    iter=iteration,
                    meters=str(meters),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                )
            )
        if iteration % checkpoint_period == 0:
            checkpointer.save("model_{:07d}".format(iteration), **arguments)
        if data_loader_val_support is not None and test_period > 0 and iteration % test_period == 0:
            # meters_val = MetricLogger(delimiter="  ")
            synchronize()
            # """
            model.train()
            with torch.no_grad():
                weights_novel_val_sup_all = []
                current_classifier_novel = torch.zeros(
                    [iter_size * nGPU, 1024]).to(device)
                # print(current_classifier_novel)
                avg_steps = 0
                for iteration_val_sup, (images_val_sup, targets_val_sup, idx_val_sup) in enumerate(tqdm(data_loader_val_support)):
                    if any(len(target) < 1 for target in targets_val_sup):
                        logger.error(f"Iteration={iteration + 1} || Image Ids used for training support {idx_val_sup} || targets Length={[len(target) for target in targets_val_sup]}")
                        continue
                    batch_id_val_sup = batch_cls_sup[rank][int(
                        iteration_val_sup)]
                    # print(iteration_val_sup)

                    images_val_sup = images_val_sup.to(device)
                    targets_val_sup = [target.to(device)
                                       for target in targets_val_sup]
                    weight_novel_val_sup = model(images_val_sup, targets_val_sup,
                                                 is_support=True, batch_id=batch_id_val_sup)
                    # weights_novel[rank] = weight_novel_val_sup
                    # print(weight_novel_val_sup.size())
                    # print('before', weight_novel_val_sup)
                    # print('batch_id', batch_id, weight_novel_val_sup[:10])
                    # weight_novel_val_sup = {batch_id:weight_novel_val_sup}
                    torch.cuda.empty_cache()

                    # synchronize()
                    weights_novel_val_sup = [torch.empty_like(weight_novel_val_sup)
                                             for i in range(dist.get_world_size())]
                    dist.all_gather(weights_novel_val_sup,
                                    weight_novel_val_sup)
                    # weights_novel_val_sup = torch.cat(
                    #     all_gather(weight_novel_val_sup))
                    # print('after', weights_novel_val_sup)
                    # print(idx, weights_novel_val_sup)
                    # print(weights_novel_val_sup[:,:10])
                    # if is_main_process():
                    #     import pdb
                    #     pdb.set_trace()
                    # else:
                    #     return
                    weights_novel_val_sup_all.append(
                        torch.cat(weights_novel_val_sup))
                    # print('length', len(weights_novel_val_sup_all))

                    if (iteration_val_sup + 1) % iter_size_qry == 0:
                        # print(torch.cat(weights_novel_val_sup_all).size())
                        # weights_novel_val_sup_all = []
                        avg_steps += 1
                        # print('current_classifier_novel', current_classifier_novel)
                        # print('weights_novel_val_sup_all', weights_novel_val_sup_all)
                        current_classifier_novel = current_classifier_novel + \
                            torch.cat(weights_novel_val_sup_all)
                        weights_novel_val_sup_all = []

                # if is_main_process():
                #     import pdb
                #     pdb.set_trace()
                # else:
                #     return
                # print(iteration_val_sup)
                current_classifier_novel_avg = current_classifier_novel / avg_steps
                model.module.roi_heads.box.cls_weights = torch.cat([model.module.roi_heads.box.predictor.cls_score.weight,
                                                                    current_classifier_novel_avg])
                # """
            output_folder = os.path.join(cfg.OUTPUT_DIR, "Validation")
            mkdir(output_folder)
            np.save(os.path.join(output_folder, 'cls_weights_'+str(iteration / iter_size_qry)), np.array(model.module.roi_heads.box.cls_weights.cpu().data))

            res_infer = inference(  # The result can be used for additional logging, e. g. for TensorBoard
                model,
                iteration / iter_size,
                # The method changes the segmentation mask format in a data loader,
                # so every time a new data loader is created:
                make_data_loader(cfg, is_train=False, is_distributed=(
                    get_world_size() > 1), is_for_period=True),
                dataset_name="[Validation]",
                iou_types=iou_types,
                box_only=False if cfg.MODEL.RETINANET_ON else cfg.MODEL.RPN_ONLY,
                device=cfg.MODEL.DEVICE,
                expected_results=cfg.TEST.EXPECTED_RESULTS,
                expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
                output_folder=output_folder,
            )
            # import pdb; pdb.set_trace()
            if res_infer:
                meters_val.update(iteration / iter_size, **res_infer)

            synchronize()
            # print('eval')
            model.train()

            """
            with torch.no_grad():
                # Should be one image for each GPU:
                for iteration_val, (images_val, targets_val, _) in enumerate(tqdm(data_loader_val_test)):
                    images_val = images_val.to(device)
                    targets_val = [target.to(device) for target in targets_val]
                    loss_dict = model(images_val, targets_val)
                    losses = sum(loss for loss in loss_dict.values())
                    loss_dict_reduced = reduce_loss_dict(loss_dict)
                    losses_reduced = sum(
                        loss for loss in loss_dict_reduced.values())
                    meters_val.update(
                        iteration / iter_size, loss=losses_reduced, **loss_dict_reduced)
            """
            synchronize()
            logger.info(
                meters_val.delimiter.join(
                    [
                        "[Validation]: ",
                        "eta: {eta}",
                        "iter: {iter}",
                        "{meters}",
                        "lr: {lr:.6f}",
                        "max mem: {memory:.0f}",
                    ]
                ).format(
                    eta=eta_string,
                    iter=iteration / iter_size,
                    meters=str(meters_val),
                    lr=optimizer.param_groups[0]["lr"],
                    memory=torch.cuda.max_memory_allocated() / 1024.0 / 1024.0,
                )
            )
#             """
        if iteration == max_iter:
            checkpointer.save("model_final", **arguments)
            # import json
            # json.dump(model.module.roi_heads.box.cls_weights, open(os.path.join(output_folder, 'cls_weights.json'), 'w'))

    total_training_time = time.time() - start_training_time
    total_time_str = str(datetime.timedelta(seconds=total_training_time))
    logger.info(
        "Total training time: {} ({:.4f} s / it)".format(
            total_time_str, total_training_time / (max_iter)
        )
    )
Ejemplo n.º 31
0
def main():
    parser = argparse.ArgumentParser(
        description="PyTorch Object Detection Inference")
    parser.add_argument(
        "--config-file",
        default="configs/e2e_faster_rcnn_R_50_FPN_1x.yaml",
        metavar="FILE",
        help="path to config file",
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--ckpt",
        help=
        "The path to the checkpoint for test, default is the latest checkpoint.",
        default=None,
    )
    parser.add_argument('--num_iteration',
                        dest='num_iteration',
                        help='Specify which weight to load',
                        default=-1,
                        type=int)
    parser.add_argument('--object_thres',
                        dest='object_thres',
                        help='Object threshold',
                        default=0.1,
                        type=float)  # used to be 0.4 or 0.05
    parser.add_argument('--human_thres',
                        dest='human_thres',
                        help='Human threshold',
                        default=0.8,
                        type=float)
    parser.add_argument('--prior_flag',
                        dest='prior_flag',
                        help='whether use prior_flag',
                        default=1,
                        type=int)
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(
        os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    distributed = num_gpus > 1 and torch.cuda.is_available()

    if distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(backend="nccl",
                                             init_method="env://")
        synchronize()

    ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
    # DATA_DIR = os.path.abspath(os.path.join(ROOT_DIR, 'Data'))
    args.config_file = os.path.join(ROOT_DIR, args.config_file)

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    cfg.freeze()

    save_dir = ""
    logger = setup_logger("DRG", save_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(cfg)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    model = build_detection_model(cfg)
    # model.to(cfg.MODEL.DEVICE)
    device = torch.device(
        "cuda") if torch.cuda.is_available() else torch.device("cpu")
    model.to(device)

    # Initialize mixed-precision if necessary
    use_mixed_precision = cfg.DTYPE == 'float16'
    amp_handle = amp.init(enabled=use_mixed_precision, verbose=cfg.AMP_VERBOSE)

    output_dir = cfg.OUTPUT_DIR
    checkpointer = DetectronCheckpointer(cfg, model, save_dir=output_dir)

    if args.num_iteration != -1:
        args.ckpt = os.path.join(cfg.OUTPUT_DIR,
                                 'model_%07d.pth' % args.num_iteration)
    ckpt = cfg.MODEL.WEIGHT if args.ckpt is None else args.ckpt
    logger.info("Testing checkpoint {}".format(ckpt))
    _ = checkpointer.load(ckpt, use_latest=args.ckpt is None)

    # iou_types = ("bbox",)
    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            if args.num_iteration != -1:
                output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_ho",
                                             dataset_name,
                                             "model_%07d" % args.num_iteration)
            else:
                output_folder = os.path.join(cfg.OUTPUT_DIR, "inference_ho",
                                             dataset_name)
            mkdir(output_folder)
            output_folders[idx] = output_folder

    opt = {}
    opt['word_dim'] = 300
    for output_folder, dataset_name in zip(output_folders, dataset_names):
        data = DatasetCatalog.get(dataset_name)
        data_args = data["args"]
        im_dir = data_args['im_dir']
        test_detection = pickle.load(open(data_args['test_detection_file'],
                                          "rb"),
                                     encoding='latin1')
        prior_mask = pickle.load(open(data_args['prior_mask'], "rb"),
                                 encoding='latin1')
        action_dic = json.load(open(data_args['action_index']))
        action_dic_inv = {y: x for x, y in action_dic.items()}
        vcoco_test_ids = open(data_args['vcoco_test_ids_file'], 'r')
        test_image_id_list = [int(line.rstrip()) for line in vcoco_test_ids]
        vcocoeval = VCOCOeval(data_args['vcoco_test_file'],
                              data_args['ann_file'],
                              data_args['vcoco_test_ids_file'])
        word_embeddings = pickle.load(open(data_args['word_embedding_file'],
                                           "rb"),
                                      encoding='latin1')
        output_file = os.path.join(output_folder, 'detection.pkl')
        output_dict_file = os.path.join(
            output_folder, 'detection_app_{}_new.pkl'.format(dataset_name))

        logger.info("Output will be saved in {}".format(output_file))
        logger.info("Start evaluation on {} dataset({} images).".format(
            dataset_name, len(test_image_id_list)))

        run_test(model,
                 dataset_name=dataset_name,
                 im_dir=im_dir,
                 test_detection=test_detection,
                 word_embeddings=word_embeddings,
                 test_image_id_list=test_image_id_list,
                 prior_mask=prior_mask,
                 action_dic_inv=action_dic_inv,
                 output_file=output_file,
                 output_dict_file=output_dict_file,
                 object_thres=args.object_thres,
                 human_thres=args.human_thres,
                 prior_flag=args.prior_flag,
                 device=device,
                 cfg=cfg)

        synchronize()

        vcocoeval._do_eval(output_file, ovr_thresh=0.5)
Ejemplo n.º 32
0
def main():
    parser = argparse.ArgumentParser(description="PyTorch Object Detection Training")
    parser.add_argument(
        "--config-file",
        default="",
        metavar="FILE",
        help="path to config file",
        type=str,
    )
    parser.add_argument("--local_rank", type=int, default=0)
    parser.add_argument(
        "--skip-test",
        dest="skip_test",
        help="Do not test the final model",
        action="store_true",
        default=True  # add by hui
    )
    # ################################################ add by hui #################################################
    parser.add_argument(
        "--temp",
        help="whether generate to temp output",
        default=False,
        type=bool
    )
    # #################################################################################################
    parser.add_argument(
        "opts",
        help="Modify config options using the command-line",
        default=None,
        nargs=argparse.REMAINDER,
    )

    args = parser.parse_args()

    num_gpus = int(os.environ["WORLD_SIZE"]) if "WORLD_SIZE" in os.environ else 1
    args.distributed = num_gpus > 1

    if args.distributed:
        torch.cuda.set_device(args.local_rank)
        torch.distributed.init_process_group(
            backend="nccl", init_method="env://"
        )
        synchronize()

    cfg.merge_from_file(args.config_file)
    cfg.merge_from_list(args.opts)
    # ################### change by hui #################################################
    if args.temp:
        if os.path.exists("./outputs/temp"): shutil.rmtree('./outputs/temp')
        adaptive_config_change("OUTPUT_DIR", cfg.OUTPUT_DIR, './outputs/temp')
    cfg.freeze()

    some_pre_deal()
    ##################################################################################################

    output_dir = cfg.OUTPUT_DIR
    if output_dir:
        mkdir(output_dir)

    logger = setup_logger("maskrcnn_benchmark", output_dir, get_rank())
    logger.info("Using {} GPUs".format(num_gpus))
    logger.info(args)

    logger.info("Collecting env info (might take some time)")
    logger.info("\n" + collect_env_info())

    logger.info("Loaded configuration file {}".format(args.config_file))
    with open(args.config_file, "r") as cf:
        config_str = "\n" + cf.read()
        logger.info(config_str)
    logger.info("Running with config:\n{}".format(cfg))

    model = train(cfg, args.local_rank, args.distributed)

    if not args.skip_test:
        run_test(cfg, model, args.distributed)
Ejemplo n.º 33
0
def main(step, cfg):
    model = build_detection_model(cfg)
    model.to(cfg.MODEL.DEVICE)

    model2 = rating_model(cfg)
    model2.to(cfg.MODEL.DEVICE)
    print(model2)

    model3 = predicate_model(cfg)
    model3.to(cfg.MODEL.DEVICE)
    print(model3)

    backbone_parameters = torch.load(os.getcwd() + cfg.CONFIG.backbone_weight,
                                     map_location=torch.device("cpu"))
    newdict = {}
    newdict['model'] = removekey(backbone_parameters['model'], [])
    load_state_dict(model, newdict.pop("model"))

    rating_parameters = torch.load(os.getcwd() + cfg.CONFIG.rating_weight,
                                   map_location=torch.device("cpu"))
    newdict = {}
    newdict['model'] = removekey(rating_parameters['model'], [])
    load_state_dict(model2, newdict.pop("model"))

    predicate_parameters = torch.load(
        os.getcwd() + "/outputs/output_predicate_recognition_stage/model3_" +
        step + ".pth",
        map_location=torch.device("cpu"))
    newdict = {}
    newdict['model'] = removekey(predicate_parameters['model'], [])
    load_state_dict(model3, newdict.pop("model"))

    output_folders = [None] * len(cfg.DATASETS.TEST)
    dataset_names = cfg.DATASETS.TEST
    if cfg.OUTPUT_DIR:
        for idx, dataset_name in enumerate(dataset_names):
            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference",
                                         dataset_name, 'step', step)
            mkdir(output_folder)
            output_folders[idx] = output_folder
    data_loaders_val = make_data_loader(cfg,
                                        is_train=False,
                                        is_distributed=False)
    for output_folder, dataset_name, data_loader_val in zip(
            output_folders, dataset_names, data_loaders_val):
        entire_test(
            model,
            model2,
            model3,
            data_loader_val,
            dataset_name=dataset_name,
            iou_types=("bbox", ),
            box_only=cfg.MODEL.RPN_ONLY,
            device=cfg.MODEL.DEVICE,
            expected_results=cfg.TEST.EXPECTED_RESULTS,
            expected_results_sigma_tol=cfg.TEST.EXPECTED_RESULTS_SIGMA_TOL,
            output_folder=output_folder,
        )
        synchronize()

    #transform results into matlab
    output_folder2 = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name,
                                  'step', step, 'predictions2.pth')
    predictions2 = torch.load(output_folder2)
    save_dir2 = os.path.join(cfg.OUTPUT_DIR, "inference", dataset_name,
                             'extraction/predicate_eval', step)
    if not os.path.exists(save_dir2):
        os.makedirs(save_dir2)
    for i in range(len(predictions2)):
        output = predictions2[i]
        output = output.numpy()
        dataNew = save_dir2 + '/' + str(i) + '.mat'
        scio.savemat(dataNew, {'data': output})