Esempio n. 1
0
def test_help(capsys):
    with pytest.raises(SystemExit) as error:
        run_create_mining_cache(["--help"])
    stdout, stderr = capsys.readouterr()

    assert error.value.code == 0
    assert stdout.startswith("usage:")
    assert stderr == ""
Esempio n. 2
0
def test_missing_sqlite_db():
    with pytest.raises(FileNotFoundError, match="^No database found"):
        run_create_mining_cache([
            "--data-and-models-dir",
            "/some/path",
            "--db-type",
            "sqlite",
            "--db-url",
            "fake$?#",
        ], )
Esempio n. 3
0
def test_send_through(
    monkeypatch,
    tmpdir,
    db_type,
    db_url,
    target_table_name,
    n_processes_per_model,
    restrict_to_models,
):
    # Monkey-patching
    data_dir = "/data_dir"
    df_model_library = pd.DataFrame(
        columns=["entity_type", "entity_type_name", "model_id", "model_path"],
        data=[
            [
                "CELL_COMPARTMENT",
                "CELLULAR_COMPONENT",
                "path/to/model_1",
                f"{data_dir}/path/to/model_1",
            ],
            [
                "CELL_TYPE",
                "CELL_TYPE",
                "path/to/model_2",
                f"{data_dir}/path/to/model_2",
            ],
            [
                "CHEMICAL", "CHEBI", "path/to/model_3",
                f"{data_dir}/path/to/model_3"
            ],
        ],
    )
    fake_load_ee_models_library = Mock()
    fake_load_ee_models_library.return_value = df_model_library
    fake_sqlalchemy = Mock()
    fake_create_mining_cache = Mock()
    monkeypatch.setattr("bluesearch.entrypoint.create_mining_cache.sqlalchemy",
                        fake_sqlalchemy)
    monkeypatch.setattr(
        "bluesearch.entrypoint.create_mining_cache.load_ee_models_library",
        fake_load_ee_models_library,
    )
    monkeypatch.setattr("bluesearch.database.CreateMiningCache",
                        fake_create_mining_cache)
    monkeypatch.setattr("bluesearch.database.CreateMiningCache",
                        fake_create_mining_cache)
    monkeypatch.setattr(
        "bluesearch.entrypoint.create_mining_cache.getpass.getpass",
        lambda _: "fake_password",
    )

    # Create temporary sqlite database
    if db_type == "sqlite":
        db_url = pathlib.Path(tmpdir) / "my.db"
        db_url.touch()

    # Construct arguments
    argv = [
        "--data-and-models-dir=/some/fake/path",
        f"--db-type={db_type}",
        f"--db-url={db_url}",
        f"--target-table-name={target_table_name}",
        f"--n-processes-per-model={n_processes_per_model}",
        f"--restrict-to-models={restrict_to_models}",
    ]

    # Call entrypoint method
    # import pdb; pdb.set_trace()
    run_create_mining_cache(argv)

    # Checks
    # Check that CreateMiningCache(...) was called once and get its arguments
    fake_create_mining_cache.assert_called_once()
    args, kwargs = fake_create_mining_cache.call_args

    # Construct the restricted model library data frame
    selected_models = restrict_to_models.split(",")
    df_model_library_selected = df_model_library[
        df_model_library["model_id"].isin(selected_models).tolist()]

    # Check the args/kwargs
    assert kwargs["database_engine"] == fake_sqlalchemy.create_engine()
    assert isinstance(kwargs["ee_models_library"], pd.DataFrame)
    assert len(df_model_library_selected) > 0
    assert kwargs["ee_models_library"].equals(df_model_library_selected)
    assert kwargs["target_table_name"] == target_table_name
    assert kwargs["workers_per_model"] == n_processes_per_model

    # Check that CreateMiningCache.construct() was called
    fake_create_mining_cache().construct.assert_called_once()
Esempio n. 4
0
def test_send_through(
    monkeypatch,
    tmpdir,
    db_type,
    db_url,
    target_table_name,
    n_processes_per_model,
    restrict_to_etypes,
    entity_types,
    spacy_model_path,
):
    # Monkey-patching
    fake_sqlalchemy = Mock()
    fake_create_mining_cache = Mock()
    monkeypatch.setattr("bluesearch.entrypoint.create_mining_cache.sqlalchemy",
                        fake_sqlalchemy)
    monkeypatch.setattr("bluesearch.database.CreateMiningCache",
                        fake_create_mining_cache)
    monkeypatch.setattr("bluesearch.database.CreateMiningCache",
                        fake_create_mining_cache)
    monkeypatch.setattr(
        "bluesearch.entrypoint.create_mining_cache.getpass.getpass",
        lambda _: "fake_password",
    )

    # Create temporary sqlite database
    if db_type == "sqlite":
        db_url = pathlib.Path(tmpdir) / "my.db"
        db_url.touch()

    # Construct arguments
    argv = [
        f"--data-and-models-dir={spacy_model_path}",
        f"--db-type={db_type}",
        f"--db-url={db_url}",
        f"--target-table-name={target_table_name}",
        f"--n-processes-per-model={n_processes_per_model}",
        f"--restrict-to-etypes={restrict_to_etypes}",
    ]

    # Call entrypoint method
    # import pdb; pdb.set_trace()
    run_create_mining_cache(argv)

    # Checks
    # Check that CreateMiningCache(...) was called once and get its arguments
    fake_create_mining_cache.assert_called_once()
    args, kwargs = fake_create_mining_cache.call_args

    # Construct the restricted etypes
    available_models = set(restrict_to_etypes.split(",")) & set(entity_types)

    # Check the args/kwargs
    assert kwargs["database_engine"] == fake_sqlalchemy.create_engine()
    assert isinstance(kwargs["ee_models_paths"], dict)
    assert len(kwargs["ee_models_paths"]) == len(available_models)
    assert kwargs["target_table_name"] == target_table_name
    assert kwargs["workers_per_model"] == n_processes_per_model

    # Check that CreateMiningCache.construct() was called
    fake_create_mining_cache().construct.assert_called_once()