def test_assert_config():

    config = _Config()
    config.a = "a"
    config.b = "b"
    config.c = 1234

    with pytest.raises(TypeError,
                       match=r"Argument required_fields should be a Sequence of"):
        assert_config(config, 1234)

    with pytest.raises(ValueError,
                       match=r"Entries of required_fields should be"):
        assert_config(config, (1, 2, 3))

    required_fields = (
        ("a", str),
        ("b", str),
        ("c", Number),
        ("d", float)
    )

    with pytest.raises(ValueError,
                       match=r"Config should have attribute:"):
        assert_config(config, required_fields)

    config.d = "123"

    with pytest.raises(TypeError,
                       match=r"should be of type"):
        assert_config(config, required_fields)

    config.d = 12.34
    assert_config(config, required_fields)
Esempio n. 2
0
def run(config, logger=None, local_rank=0, **kwargs):

    assert torch.cuda.is_available()
    assert torch.backends.cudnn.enabled, "Nvidia/Amp requires cudnn backend to be enabled."

    dist.init_process_group("nccl", init_method="env://")

    # As we passed config with option --manual_config_load
    assert hasattr(config, "setup"), "We need to manually setup the configuration, please set --manual_config_load " \
                                     "to py_config_runner"

    config = config.setup()

    assert_config(config, INFERENCE_CONFIG)

    # The following attributes are automatically added by py_config_runner
    assert hasattr(config, "config_filepath") and isinstance(
        config.config_filepath, Path)
    assert hasattr(config, "script_filepath") and isinstance(
        config.script_filepath, Path)

    output_path = mlflow.get_artifact_uri()
    config.output_path = Path(output_path)

    if dist.get_rank() == 0:

        # dump python files to reproduce the run
        mlflow.log_artifact(config.config_filepath.as_posix())
        mlflow.log_artifact(config.script_filepath.as_posix())

        mlflow.log_params({
            "pytorch version": torch.__version__,
            "ignite version": ignite.__version__,
        })
        mlflow.log_params(get_params(config, INFERENCE_CONFIG))
        mlflow.log_params({'mean': config.mean, 'std': config.std})

    try:
        import os

        with_pbar_on_iters = True
        if "DISABLE_PBAR_ON_ITERS" in os.environ:
            with_pbar_on_iters = False

        inference(config,
                  local_rank=local_rank,
                  with_pbar_on_iters=with_pbar_on_iters)
    except KeyboardInterrupt:
        pass
    except Exception as e:
        if dist.get_rank() == 0:
            mlflow.log_param("Run Status", "FAILED")
        dist.destroy_process_group()
        raise e

    if dist.get_rank() == 0:
        mlflow.log_param("Run Status", "OK")
    dist.destroy_process_group()
Esempio n. 3
0
def run(config, logger=None, local_rank=0, **kwargs):

    assert torch.cuda.is_available()
    assert (torch.backends.cudnn.enabled
            ), "Nvidia/Amp requires cudnn backend to be enabled."

    dist.init_process_group("nccl", init_method="env://")

    # As we passed config with option --manual_config_load
    assert hasattr(config, "setup"), (
        "We need to manually setup the configuration, please set --manual_config_load "
        "to py_config_runner")

    config = config.setup()

    assert_config(config, TRAINVAL_CONFIG)
    # The following attributes are automatically added by py_config_runner
    assert hasattr(config, "config_filepath") and isinstance(
        config.config_filepath, Path)
    assert hasattr(config, "script_filepath") and isinstance(
        config.script_filepath, Path)

    # dump python files to reproduce the run
    mlflow.log_artifact(config.config_filepath.as_posix())
    mlflow.log_artifact(config.script_filepath.as_posix())

    output_path = mlflow.get_artifact_uri()
    config.output_path = Path(output_path)

    if dist.get_rank() == 0:
        mlflow.log_params({
            "pytorch version": torch.__version__,
            "ignite version": ignite.__version__,
        })
        mlflow.log_params(get_params(config, TRAINVAL_CONFIG))

    try:
        training(
            config,
            local_rank=local_rank,
            with_mlflow_logging=True,
            with_plx_logging=False,
        )
    except KeyboardInterrupt:
        logger.info("Catched KeyboardInterrupt -> exit")
    except Exception as e:  # noqa
        logger.exception("")
        mlflow.log_param("Run Status", "FAILED")
        dist.destroy_process_group()
        raise e

    mlflow.log_param("Run Status", "OK")
    dist.destroy_process_group()
Esempio n. 4
0
def run(config, **kwargs):
    """This is the main method to run the training. As this training script is launched with `py_config_runner`
    it should obligatory contain `run(config, **kwargs)` method.

    """

    assert torch.cuda.is_available(), torch.cuda.is_available()
    assert torch.backends.cudnn.enabled, "Nvidia/Amp requires cudnn backend to be enabled."

    with idist.Parallel(backend="nccl") as parallel:

        logger = setup_logger(name="Pascal-VOC12 Training",
                              distributed_rank=idist.get_rank())

        assert_config(config, TRAINVAL_CONFIG)
        # The following attributes are automatically added by py_config_runner
        assert hasattr(config, "config_filepath") and isinstance(
            config.config_filepath, Path)
        assert hasattr(config, "script_filepath") and isinstance(
            config.script_filepath, Path)

        if idist.get_rank() == 0 and exp_tracking.has_clearml:
            try:
                from clearml import Task
            except ImportError:
                # Backwards-compatibility for legacy Trains SDK
                from trains import Task

            task = Task.init("Pascal-VOC12 Training",
                             config.config_filepath.stem)
            task.connect_configuration(config.config_filepath.as_posix())

        log_basic_info(logger, config)

        config.output_path = Path(exp_tracking.get_output_path())
        # dump python files to reproduce the run
        exp_tracking.log_artifact(config.config_filepath.as_posix())
        exp_tracking.log_artifact(config.script_filepath.as_posix())
        exp_tracking.log_params(get_params(config, TRAINVAL_CONFIG))

        try:
            parallel.run(training, config, logger=logger)
        except KeyboardInterrupt:
            logger.info("Catched KeyboardInterrupt -> exit")
        except Exception as e:  # noqa
            logger.exception("")
            raise e
Esempio n. 5
0
def run(config, logger=None, local_rank=0, **kwargs):

    assert torch.cuda.is_available(), torch.cuda.is_available()
    assert torch.backends.cudnn.enabled, "Nvidia/Amp requires cudnn backend to be enabled."

    dist.init_process_group("nccl", init_method="env://")

    # As we passed config with option --manual_config_load
    assert hasattr(config, "setup"), (
        "We need to manually setup the configuration, please set --manual_config_load "
        "to py_config_runner")

    config = config.setup()

    assert_config(config, TRAINVAL_CONFIG)
    # The following attributes are automatically added by py_config_runner
    assert hasattr(config, "config_filepath") and isinstance(
        config.config_filepath, Path)
    assert hasattr(config, "script_filepath") and isinstance(
        config.script_filepath, Path)

    config.output_path = Path(get_outputs_path())

    if dist.get_rank() == 0:
        plx_exp = Experiment()
        plx_exp.log_params(
            **{
                "pytorch version": torch.__version__,
                "ignite version": ignite.__version__,
            })
        plx_exp.log_params(**get_params(config, TRAINVAL_CONFIG))

    try:
        training(config,
                 local_rank=local_rank,
                 with_mlflow_logging=False,
                 with_plx_logging=True)
    except KeyboardInterrupt:
        logger.info("Catched KeyboardInterrupt -> exit")
    except Exception as e:  # noqa
        logger.exception("")
        dist.destroy_process_group()
        raise e

    dist.destroy_process_group()
Esempio n. 6
0
def run(config, logger=None, local_rank=0, **kwargs):

    assert torch.cuda.is_available()
    assert torch.backends.cudnn.enabled, "Nvidia/Amp requires cudnn backend to be enabled."

    task = Task.init(
        "ignite", "DeeplabV3_ResNet101 pascal_voc2012 segmentation example")

    dist.init_process_group("nccl", init_method="env://")

    # As we passed config with option --manual_config_load
    assert hasattr(config, "setup"), (
        "We need to manually setup the configuration, please set --manual_config_load "
        "to py_config_runner")

    config = config.setup()

    assert_config(config, TRAINVAL_CONFIG)
    # The following attributes are automatically added by py_config_runner
    assert hasattr(config, "config_filepath") and isinstance(
        config.config_filepath, Path)
    assert hasattr(config, "script_filepath") and isinstance(
        config.script_filepath, Path)

    # dump python files to reproduce the run
    task.connect_configuration(config.config_filepath.as_posix())
    task.upload_artifact("script", config.script_filepath)

    config.output_path = Path("./artifacts")

    # log the configuration, if we are the master node
    if dist.get_rank() == 0:
        task.connect(get_params(config, TRAINVAL_CONFIG))

    try:
        training(config, local_rank=local_rank, with_trains_logging=True)
    except KeyboardInterrupt:
        logger.info("Caught KeyboardInterrupt -> exit")
    except Exception as e:  # noqa
        logger.exception("")
        dist.destroy_process_group()
        raise e

    dist.destroy_process_group()
def run(config, **kwargs):
    """This is the main method to run the training. As this training script is launched with `py_config_runner`
    it should obligatory contain `run(config, **kwargs)` method.

    """

    assert torch.cuda.is_available(), torch.cuda.is_available()
    assert torch.backends.cudnn.enabled, "Nvidia/Amp requires cudnn backend to be enabled."

    with idist.Parallel(backend="nccl") as parallel:

        logger = setup_logger(name="Satellite segmentation Training",
                              distributed_rank=idist.get_rank())

        assert_config(config, TRAINVAL_CONFIG)
        # The following attributes are automatically added by py_config_runner
        assert hasattr(config, "config_filepath") and isinstance(
            config.config_filepath, Path)
        assert hasattr(config, "script_filepath") and isinstance(
            config.script_filepath, Path)

        log_basic_info(logger, config)

        config.output_path = Path(tracking.get_output_path())
        # dump python files to reproduce the run
        tracking.log_artifact(config.config_filepath.as_posix())
        tracking.log_artifact(config.script_filepath.as_posix())
        tracking.log_params(get_params(config, TRAINVAL_CONFIG))

        try:
            parallel.run(training, config, logger=logger)
        except KeyboardInterrupt:
            logger.info("Catched KeyboardInterrupt -> exit")
        except Exception as e:  # noqa
            logger.exception("")
            raise e