Ejemplo n.º 1
0
def test_build_cv_mode_cross_val_cache(
    tmpdir,
    should_save_model: bool,
    cv_mode_1: str,
    cv_mode_2: str,
    runner: CliRunner,
    machine: Machine,
):
    """
    Checks that cv_scores uses cache if ran after a full build. Loads the same model, and can
    print the cv_scores from them.
    """
    logger.info(f"MODEL_CONFIG={json.dumps(machine.model)}")

    machine.evaluation = cv_mode_1  # type: ignore
    with temp_env_vars(MACHINE=json.dumps(machine.to_dict()),
                       OUTPUT_DIR=str(tmpdir)):
        runner.invoke(cli.gordo, ["build"])

    machine.evaluation = cv_mode_2  # type: ignore
    with temp_env_vars(MACHINE=json.dumps(machine.to_dict()),
                       OUTPUT_DIR=str(tmpdir)):
        runner.invoke(cli.gordo, ["build"])

    if should_save_model:
        assert len(os.listdir(tmpdir)) > 0
    else:
        assert len(os.listdir(tmpdir)) == 0
Ejemplo n.º 2
0
def test_build_cv_mode(tmpdir, runner: CliRunner, should_save_model: bool,
                       cv_mode: str, machine: Machine):
    """
    Testing build with cv_mode set to full and cross_val_only. Checks that cv_scores are
    printed and model are only saved when using the default (full) value.
    """
    machine.model = MODEL_CONFIG_WITH_PREDICT
    machine.evaluation = cv_mode  # type: ignore

    logger.info(f"MODEL_CONFIG={json.dumps(machine.model)}")

    tmp_model_dir = os.path.join(tmpdir, "tmp")
    os.makedirs(tmp_model_dir, exist_ok=True)

    with temp_env_vars(MACHINE=json.dumps(machine.to_dict()),
                       OUTPUT_DIR=tmp_model_dir):
        result = runner.invoke(cli.gordo, ["build", "--print-cv-scores"])
        assert result.exit_code == 0
        # Checks that the file is empty or not depending on the mode.
        if should_save_model:
            assert len(os.listdir(tmp_model_dir)) != 0
        else:
            assert len(os.listdir(tmp_model_dir)) == 0

        # Checks the output contains 'explained-variance_raw-scores'
        assert "r2-score" in result.output
        assert "mean-squared-error" in result.output
        assert "mean-absolute-error" in result.output
        assert "explained-variance-score" in result.output
Ejemplo n.º 3
0
def test_build_cv_mode_build_only(tmpdir, runner: CliRunner, machine: Machine):
    """
    Testing build with cv_mode set to build_only. Checks that OUTPUT_DIR gets a model
    saved to it. It also checks that the metadata contains cv-duration-sec=None and
    cv-scores={}
    """

    logger.info(f"MODEL_CONFIG={json.dumps(machine.model)}")
    machine.evaluation = {"cv_mode": "build_only"}

    with temp_env_vars(MACHINE=json.dumps(machine.to_dict()),
                       OUTPUT_DIR=str(tmpdir)):

        metadata_file = f"{os.path.join(tmpdir, 'metadata.json')}"
        runner.invoke(cli.gordo, ["build"])

        # A model has been saved
        assert len(os.listdir(tmpdir)) != 0
        with open(metadata_file) as f:
            metadata_json = json.loads(f.read())
            assert (metadata_json["metadata"]["build_metadata"]["model"]
                    ["cross_validation"]["cv_duration_sec"] is None)
            assert (metadata_json["metadata"]["build_metadata"]["model"]
                    ["cross_validation"]["scores"] == {})