Esempio n. 1
0
def launch(
        main_func,
        num_processes_per_machine,
        num_machines=1,
        machine_rank=0,
        dist_url=None,
        backend="NCCL",
        always_spawn=False,
        args=(),
):
    logger.info(
        f"Launch with num_processes_per_machine: {num_processes_per_machine},"
        f" num_machines: {num_machines}, machine_rank: {machine_rank},"
        f" dist_url: {dist_url}, backend: {backend}.")

    if get_launch_environment() == "local" and not torch.cuda.is_available():
        assert len(args) > 0, args
        cfg = args[0]
        assert isinstance(cfg, CfgNode)
        if cfg.MODEL.DEVICE == "cuda":
            logger.warning(
                "Detected that CUDA is not available on this machine, set MODEL.DEVICE"
                " to cpu and backend to GLOO")
            with temp_defrost(cfg):
                cfg.MODEL.DEVICE = "cpu"
            backend = "GLOO"

    if backend == "NCCL":
        assert (
            num_processes_per_machine <= torch.cuda.device_count()
        ), "num_processes_per_machine is greater than device count: {} vs {}".format(
            num_processes_per_machine, torch.cuda.device_count())

    world_size = num_machines * num_processes_per_machine
    if world_size > 1 or always_spawn:
        # https://github.com/pytorch/pytorch/pull/14391
        # TODO prctl in spawned processes
        prefix = f"detectron2go_{main_func.__module__}.{main_func.__name__}_return"
        with tempfile.NamedTemporaryFile(prefix=prefix, suffix=".pth") as f:
            return_file = f.name
            mp.spawn(
                _distributed_worker,
                nprocs=num_processes_per_machine,
                args=(
                    main_func,
                    world_size,
                    num_processes_per_machine,
                    machine_rank,
                    dist_url,
                    backend,
                    return_file,
                    args,
                ),
                daemon=False,
            )
            if machine_rank == 0:
                return torch.load(return_file)
    else:
        return main_func(*args)
Esempio n. 2
0
def setup_loggers(output_dir, color=None):
    if not color:
        color = get_launch_environment() == "local"

    d2_logger = setup_logger(
        output_dir,
        distributed_rank=comm.get_rank(),
        color=color,
        name="detectron2",
        abbrev_name="d2",
    )
    fvcore_logger = setup_logger(
        output_dir,
        distributed_rank=comm.get_rank(),
        color=color,
        name="fvcore",
    )
    d2go_logger = setup_logger(
        output_dir,
        distributed_rank=comm.get_rank(),
        color=color,
        name="d2go",
        abbrev_name="d2go",
    )
    mobile_cv_logger = setup_logger(
        output_dir,
        distributed_rank=comm.get_rank(),
        color=color,
        name="mobile_cv",
        abbrev_name="mobile_cv",
    )

    # NOTE: all above loggers have FileHandler pointing to the same file as d2_logger.
    # Those files are opened upon creation, but it seems fine in 'a' mode.

    # NOTE: the root logger might has been configured by other applications,
    # since this already sub-top level, just don't propagate to root.
    d2_logger.propagate = False
    fvcore_logger.propagate = False
    d2go_logger.propagate = False
    mobile_cv_logger.propagate = False
Esempio n. 3
0
def setup_logger(
    module_name: str,
    output_dir: str,
    abbrev_name: Optional[str] = None,
    color: Optional[bool] = None,
) -> logging.Logger:
    if not color:
        color = get_launch_environment() == "local"
    if not abbrev_name:
        abbrev_name = module_name

    logger = _setup_logger(
        output_dir,
        distributed_rank=comm.get_rank(),
        color=color,
        name=module_name,
        abbrev_name=abbrev_name,
    )

    # NOTE: the root logger might has been configured by other applications,
    # since this already sub-top level, just don't propagate to root.
    logger.propagate = False

    return logger