Exemple #1
0
def test_run_report_error_build(tmp_path, classification_model, test_dataset,
                                monkeypatch):
    # Missing report page: jupyter-book build job warns but succeeds
    from presc.report import runner

    mock_report_source = tmp_path / "mock_report_source"
    shutil.copytree(runner.REPORT_SOURCE_PATH, mock_report_source)
    with open(mock_report_source / runner.JB_TOC_FILENAME, "a") as f:
        f.write("  - file: missing_notebook\n")
    monkeypatch.setattr(runner, "REPORT_SOURCE_PATH", mock_report_source)

    out_path_run = tmp_path / "test_run"
    rr = ReportRunner(output_path=out_path_run,
                      config_filepath=TEST_REPORT_CONFIG_PATH)
    rr.run(
        model=classification_model,
        test_dataset=test_dataset,
    )

    # Build succeeded
    assert rr.report_main_page.exists()
    assert rr._jb_build_result.returncode == 0

    # Build log includes warning about missing notebook
    with open(rr.jb_build_log) as f:
        build_log = f.read()
    assert build_log.startswith("Running Jupyter-Book")
    assert re.search("warning:.*missing_notebook", build_log.lower())
Exemple #2
0
def test_run_report_tmp_exec_dir(tmp_path, classification_model, test_dataset):
    out_path_run = tmp_path / "test_run"
    rr = ReportRunner(output_path=out_path_run,
                      config_filepath=TEST_REPORT_CONFIG_PATH)
    # Run using the using default temp execution dir.
    rr.run(
        model=classification_model,
        test_dataset=test_dataset,
        # Exclude evaluation notebooks for efficiency.
        # Only run the landing page notebook.
        settings={"report.evaluations_exclude": "*"},
    )

    # Just check that it worked.
    assert rr.report_main_page.exists()
    assert rr._jb_build_result.returncode == 0
Exemple #3
0
def test_report_runner(tmp_path):
    # Check paths are initialized correctly by the runner
    os.chdir(tmp_path)
    rr = ReportRunner()
    assert str(rr.output_path.parent.resolve()) == os.getcwd()
    assert rr.output_path.exists()
    assert rr.execution_path is None
    assert rr.config.dump() == global_config.dump()

    out_path = tmp_path / "abc" / "out"
    exec_path = tmp_path / "exec"
    conf_path = tmp_path / "conf.yaml"
    with open(conf_path, "w") as f:
        f.write(REPORT_CONFIG_YAML)

    rr = ReportRunner(output_path=out_path,
                      execution_path=exec_path,
                      config_filepath=conf_path)
    assert rr.output_path.parent == out_path
    assert rr.output_path.exists()
    assert rr.execution_path.parent == exec_path
    assert rr.config["report"]["title"].get() == "abc"
Exemple #4
0
def test_run_report_error_notebook(tmp_path, pipeline_classifier,
                                   test_dataset):
    # Error encountered while running the notebooks
    out_path_run = tmp_path / "test_run"
    exec_path_run = tmp_path / "test_exec"
    rr = ReportRunner(
        output_path=out_path_run,
        execution_path=exec_path_run,
        config_filepath=TEST_REPORT_CONFIG_PATH,
    )
    # pipeline_classifier is not a valid ClassificationModel instance
    rr.run(
        model=pipeline_classifier,
        test_dataset=test_dataset,
    )

    # jupyter-book build job succeeded even though notebooks didn't
    assert rr._jb_build_result.returncode == 0
    with open(rr.jb_build_log) as f:
        build_log = f.read()
    assert build_log.startswith("Running Jupyter-Book")
    assert "Finished generating HTML" in build_log
    # Log file mentions failure
    assert "failed" in build_log.lower()

    # Report was created
    assert rr.report_main_page.exists()
    # Execution dir and output dir contains relevant files.
    assert len(list(rr.execution_path.glob("_context_store*"))) > 0
    with open(rr.execution_path / "_toc.yml") as f:
        toc = [x.strip() for x in f.readlines()]
        notebooks = [x[8:] for x in toc if x.startswith("- file: ")]
    for nb in notebooks:
        assert (rr.execution_path / f"{nb}.ipynb").exists()
        assert (rr.report_main_page.parent / f"{nb}.html").exists()
    # Error message shows in notebooks
    with open(rr.report_main_page.parent / "landing.html") as f:
        landing_html = f.read()
    assert "AttributeError" in landing_html
Exemple #5
0
def test_run_report_error_build(tmp_path, classification_model, test_dataset,
                                monkeypatch):
    # Missing report page: jupyter-book build job fails
    from presc.report import runner

    mock_report_source = tmp_path / "mock_report_source"
    shutil.copytree(runner.REPORT_SOURCE_PATH, mock_report_source)
    with open(mock_report_source / runner.JB_TOC_FILENAME, "a") as f:
        f.write("- file: missing_notebook\n")
    monkeypatch.setattr(runner, "REPORT_SOURCE_PATH", mock_report_source)

    out_path_run = tmp_path / "test_run"
    rr = ReportRunner(output_path=out_path_run,
                      config_filepath=TEST_REPORT_CONFIG_PATH)
    # run() function generates warnings
    with pytest.warns(UserWarning) as warning_records:
        rr.run(
            model=classification_model,
            test_dataset=test_dataset,
        )
    assert len(warning_records) == 2
    # Warning from build function
    first_warning = warning_records[0].message.args[0]
    assert "jupyter-book build" in first_warning
    assert "did not succeed" in first_warning
    assert str(rr.jb_build_log) in first_warning
    # Warning from run function
    second_warning = warning_records[1].message.args[0]
    assert "expected report main page" in second_warning
    assert "error generating" in second_warning
    assert str(rr.jb_build_log) in second_warning

    # Build artifacts mention error
    assert rr._jb_build_result.returncode > 0
    with open(rr.jb_build_log) as f:
        build_log = f.read()
    assert build_log.startswith("Running Jupyter-Book")
    assert "Finished generating HTML" not in build_log
    assert "error in building" in build_log.lower()
    assert "RuntimeError" in build_log

    # Report was not produced
    assert not rr.report_main_page.exists()
    assert not rr._linked_main_page.exists()
    with pytest.raises(AttributeError):
        rr.report_html
    with pytest.raises(AttributeError):
        rr.open()
Exemple #6
0
def test_run_report_override_config(tmp_path, classification_model,
                                    test_dataset, monkeypatch):
    # Patch the JupyterBook config to disable computation for efficiency
    from presc.report import runner

    mock_report_source = tmp_path / "mock_report_source"
    shutil.copytree(runner.REPORT_SOURCE_PATH, mock_report_source)
    with open(mock_report_source / runner.JB_CONFIG_FILENAME) as f:
        jb_config = yaml.load(f, Loader=yaml.FullLoader)
    jb_config["execute"]["execute_notebooks"] = "off"
    with open(mock_report_source / runner.JB_CONFIG_FILENAME, "w") as f:
        jb_config = yaml.dump(jb_config, f)
    monkeypatch.setattr(runner, "REPORT_SOURCE_PATH", mock_report_source)

    out_path_run = tmp_path / "test_run"
    exec_path_run = tmp_path / "test_exec"
    config_path = tmp_path / "custom_config.yaml"
    with open(config_path, "w") as f:
        f.write(REPORT_CONFIG_YAML)

    rr = ReportRunner(
        output_path=out_path_run,
        execution_path=exec_path_run,
        config_filepath=config_path,
    )
    rr.run(
        model=classification_model,
        test_dataset=test_dataset,
    )

    # Report ran successfully.
    assert rr._jb_build_result.returncode == 0
    with open(rr.jb_build_log) as f:
        build_log = f.read()
    assert build_log.startswith("Running Jupyter-Book")
    assert "Finished generating HTML" in build_log
    assert "error" not in build_log.lower()
    assert "failed" not in build_log.lower()

    # Excluded page was not rendered.
    assert rr.report_main_page.exists()
    output_files = os.listdir(rr.report_main_page.parent)
    assert "landing.html" in output_files
    assert "conditional_metric.html" not in output_files
    assert "conditional_distribution.html" in output_files

    # Overridden attributes got picked up in the report pages.
    with open(rr.report_main_page.with_name("landing.html")) as f:
        landing_html = f.read()
    assert "abc</title>" in landing_html
    assert "By xyz" in landing_html

    # Rerun with further override.
    rr = ReportRunner(
        output_path=out_path_run,
        execution_path=exec_path_run,
        config_filepath=config_path,
    )
    rr.run(
        model=classification_model,
        test_dataset=test_dataset,
        settings={"report.title": "pqr"},
    )

    # Report ran successfully.
    assert rr._jb_build_result.returncode == 0
    with open(rr.jb_build_log) as f:
        build_log = f.read()
    assert build_log.startswith("Running Jupyter-Book")
    assert "Finished generating HTML" in build_log
    assert "error" not in build_log.lower()
    assert "failed" not in build_log.lower()

    # Excluded page was not rendered.
    assert rr.report_main_page.exists()
    output_files = os.listdir(rr.report_main_page.parent)
    assert "landing.html" in output_files
    assert "conditional_metric.html" not in output_files
    assert "conditional_distribution.html" in output_files

    # Overridden attributes got picked up in the report pages.
    with open(rr.report_main_page.with_name("landing.html")) as f:
        landing_html = f.read()
    assert "pqr</title>" in landing_html
    assert "By xyz" in landing_html
Exemple #7
0
def test_run_report(
    tmp_path,
    classification_model,
    test_dataset,
    train_dataset,
    webbrowser_patched,
    capsys,
):
    out_path_run = tmp_path / "test_run"
    exec_path_run = tmp_path / "test_exec"
    rr = ReportRunner(
        output_path=out_path_run,
        execution_path=exec_path_run,
        config_filepath=TEST_REPORT_CONFIG_PATH,
    )
    # Run a report on the test data. This will take ~10 seconds
    # Use a custom config that reduces computation and is more appropriate for
    # the small test dataset.
    #
    rr.run(
        model=classification_model,
        test_dataset=test_dataset,
        train_dataset=train_dataset,
    )

    # Check top-level output files exist and paths resolve
    assert isinstance(rr._jb_build_result, CompletedProcess)
    assert rr._jb_build_result.returncode == 0
    with open(rr.jb_build_log) as f:
        build_log = f.read()
    assert build_log.startswith("Running Jupyter-Book")
    assert "Finished generating HTML" in build_log
    assert "error" not in build_log.lower()
    assert "failed" not in build_log.lower()

    assert isinstance(rr._jb_clean_result, CompletedProcess)
    assert rr._jb_clean_result.returncode == 0
    # File should be empty as the output folder did not contain
    # a previous `_build` dir.
    assert rr.jb_clean_log.exists()

    assert rr.report_main_page.exists()
    # Link may not exist as it is platform-dependent.
    if rr._linked_main_page.exists():
        assert rr._linked_main_page.resolve() == rr.report_main_page.resolve()
        report_path = Path(rr.report_html)
        assert rr._linked_main_page.parent.resolve() == report_path.parent
        assert rr._linked_main_page.name == report_path.name
    else:
        assert rr.report_html == str(rr.report_main_page.resolve())

    # Opening in the browser: check the URL that is passed.
    capsys.readouterr()
    rr.open()
    url = capsys.readouterr().out.strip()
    assert url.startswith("file://")
    assert url.endswith(rr.report_html)

    # Check execution dir and output dir contains relevant files.
    assert len(list(rr.execution_path.glob("_context_store*"))) > 0
    assert (rr.execution_path / "_config.yml").exists()
    with open(rr.execution_path / "_toc.yml") as f:
        toc = [x.strip() for x in f.readlines()]
        notebooks = [x[8:] for x in toc if x.startswith("- file: ")]
    for nb in notebooks:
        assert (rr.execution_path / f"{nb}.ipynb").exists()
        assert (rr.report_main_page.parent / f"{nb}.html").exists()

    # Test cleaning on existing report output.
    rr.clean()
    assert not rr.jb_build_log.exists()
    assert not rr._linked_main_page.exists()
    assert not rr.report_main_page.parent.exists()
    assert isinstance(rr._jb_clean_result, CompletedProcess)
    assert rr._jb_clean_result.returncode == 0
    with open(rr.jb_clean_log) as f:
        clean_log = f.read()
    assert "Your _build dir" in clean_log
    assert "error" not in clean_log.lower()
    assert "failed" not in clean_log.lower()

    # Test rerunning report with the same runner.
    # Since the execution dir already exists, this tests that it get cleaned
    # successfully prior to running.
    # Only need to execute the landing page.
    rr.run(
        model=classification_model,
        test_dataset=test_dataset,
        train_dataset=train_dataset,
        settings={"report.evaluations_exclude": "*"},
        clean=False,
    )
    assert rr.report_main_page.parent.exists()
    assert rr._jb_build_result.returncode == 0
Exemple #8
0
from pathlib import Path

THIS_DIR = Path(__file__).parent
DATASET_DIR = THIS_DIR / ".." / ".." / "datasets" / "winequality.csv"

# Load the dataset.

df = pd.read_csv(DATASET_DIR)
df = df.drop(columns=["quality"])
dataset = Dataset(df, label_col="recommend")

splitter = ShuffleSplit(n_splits=1, test_size=0.3, random_state=543)
train_ind, test_ind = next(splitter.split(dataset.features))
train_dataset = dataset.subset(train_ind, by_position=True)
test_dataset = dataset.subset(test_ind, by_position=True)

# Set up the model

model = Pipeline([("scaler", StandardScaler()),
                  ("clf", SVC(class_weight="balanced"))])
cm = ClassificationModel(model)
cm.train(train_dataset)

presc_report = ReportRunner()
presc_report.run(model=cm,
                 test_dataset=test_dataset,
                 train_dataset=train_dataset)

print(f"The report is available at {presc_report.report_html}")
presc_report.open()