def test_mining_database(monkeypatch, capsys, fake_sqlalchemy_engine, mining_schema_df):
    mining_schema_df = mining_schema_df.drop_duplicates(ignore_index=True)

    responses.add_callback(
        responses.POST,
        "http://test/database",
        callback=request_callback,
        content_type="text/csv",
    )

    responses.add_callback(
        responses.POST,
        "http://test/help",
        callback=request_callback_help,
        content_type="application/json",
    )

    mining_schema = MiningSchema()
    mining_schema.add_from_df(mining_schema_df)
    mining_widget = MiningWidget(
        mining_server_url="http://test",
        mining_schema=mining_schema,
    )
    empty_dataframe = pd.DataFrame()
    assert empty_dataframe.equals(mining_widget.get_extracted_table())

    bot = MiningWidgetBot(mining_widget, capsys, monkeypatch)
    bot.set_value("input_text", "HELLO")
    bot.click("mine_articles")

    assert len(responses.calls) == 1
    assert "No article saver was provided. Nothing to mine." in bot.stdout_cached

    article_saver = ArticleSaver(fake_sqlalchemy_engine)
    for i in range(2):
        article_saver.add_article(article_id=i)

    mining_widget = MiningWidget(
        mining_server_url="http://test",
        mining_schema=mining_schema,
        article_saver=article_saver,
    )

    bot = MiningWidgetBot(mining_widget, capsys, monkeypatch)
    bot.set_value("input_text", "HELLO")
    bot.click("mine_articles")

    assert len(responses.calls) == 3
    assert "Collecting saved items..." in bot.stdout_cached
    assert isinstance(mining_widget.get_extracted_table(), pd.DataFrame)

    display_objs = bot.display_cached
    assert len(display_objs) == 3  # 1 schema + 1 warning + 1 table_extractions
    assert isinstance(display_objs[0], pd.DataFrame)
    assert isinstance(display_objs[2], pd.DataFrame)

    assert display_objs[0].equals(mining_schema_df)
    assert isinstance(display_objs[1], HTML)
    assert display_objs[2].equals(table_extractions)
def test_mining_text(monkeypatch, capsys, mining_schema_df):
    mining_schema_df = mining_schema_df.drop_duplicates(ignore_index=True)

    responses.add_callback(
        responses.POST,
        "http://test/text",
        callback=request_callback,
        content_type="application/json",
    )

    responses.add_callback(
        responses.POST,
        "http://test/help",
        callback=request_callback_help,
        content_type="application/json",
    )

    mining_schema = MiningSchema()
    mining_schema.add_from_df(mining_schema_df)
    mining_widget = MiningWidget(
        mining_server_url="http://test",
        mining_schema=mining_schema,
    )

    bot = MiningWidgetBot(mining_widget, capsys, monkeypatch)
    bot.set_value("input_text", "HELLO")
    bot.click("mine_text")

    assert len(responses.calls) == 2

    display_objs = bot.display_cached
    assert len(display_objs) == 3  # 1 schema + 1 warning + 1 table_extractions
    assert isinstance(display_objs[0], pd.DataFrame)

    assert display_objs[0].equals(mining_schema_df)
    assert isinstance(display_objs[1], HTML)
    assert display_objs[2].equals(table_extractions)
def test_add_entity():
    mining_schema = MiningSchema()

    # Test adding entities
    mining_schema.add_entity(
        "CHEMICAL",
        property_name="isChiral",
        property_type="ATTRIBUTE",
        property_value_type="BOOLEAN",
        ontology_source="NCIT",
    )
    mining_schema.add_entity("DRUG")
    assert len(mining_schema.schema_df) == 2

    # Test warning upon adding a duplicate entity
    with pytest.warns(UserWarning, match=r"already exists"):
        mining_schema.add_entity("DRUG")
def test_df(mining_schema_df):
    # We won't be testing for duplicates in this test
    mining_schema_df = mining_schema_df.drop_duplicates(ignore_index=True)

    # Test adding from a dataframe
    mining_schema = MiningSchema()
    mining_schema.add_from_df(mining_schema_df)
    # Make sure a copy is returned
    assert mining_schema.df is not mining_schema.schema_df
    # Check that all data was added
    assert mining_schema.df.equals(mining_schema_df)

    # Test missing entity_type
    wrong_schema_df = mining_schema_df.drop("entity_type", axis=1)
    mining_schema = MiningSchema()
    with pytest.raises(ValueError, match=r"entity_type.* not found"):
        mining_schema.add_from_df(wrong_schema_df)

    # Test ignoring unknown columns
    schema_df_new = mining_schema_df.drop_duplicates().copy()
    schema_df_new["unknown_column"] = [i for i in range(len(schema_df_new))]
    mining_schema = MiningSchema()
    with pytest.warns(UserWarning, match=r"column.* unknown_column"):
        mining_schema.add_from_df(schema_df_new)
    # Check that all data was added and the unknown columns was ignored
    assert mining_schema.df.equals(mining_schema_df)
def test_save_load_checkpoint(monkeypatch, capsys, mining_schema_df, tmpdir):
    mining_schema_df = mining_schema_df.drop_duplicates(ignore_index=True)
    responses.add_callback(
        responses.POST,
        "http://test/text",
        callback=request_callback,
        content_type="application/json",
    )

    responses.add_callback(
        responses.POST,
        "http://test/help",
        callback=request_callback_help,
        content_type="application/json",
    )

    mining_schema = MiningSchema()
    mining_schema.add_from_df(mining_schema_df)
    mining_widget = MiningWidget(
        mining_server_url="http://test",
        mining_schema=mining_schema,
        checkpoint_path=tmpdir,
    )

    bot = MiningWidgetBot(mining_widget, capsys, monkeypatch)
    bot.set_value("input_text", "HELLO")

    # Try saving data, but no results to save
    bot.click("save_button")
    last_displayed = bot.display_cached[-1].data
    assert "ERROR!" in last_displayed
    assert "No mining results available." in last_displayed

    # Click on "investigate"
    bot.click("mine_text")

    # Try loading data, but no checkpoint was saved there
    bot.click("load_button")
    last_displayed = bot.display_cached[-1].data
    assert "ERROR!" in last_displayed
    assert "No checkpoint file found to load." in last_displayed

    # Now there are some results, so we can save a checkpoint
    bot.click("save_button")
    displayed = bot.display_cached
    with bot.mining_widget.checkpoint_path.open("r") as f:
        data = json.load(f)
    assert np.array_equal(
        pd.DataFrame(data["mining_widget_extractions"]).values,
        bot.mining_widget.table_extractions.values,
    )
    assert data["database_name"] == bot.mining_widget.database_name
    assert data["mining_server_version"] == bot.mining_widget.mining_server_version
    assert "DONE" in displayed[-1].data
    assert "Saving mining results to disk..." in displayed[-2].data

    # Now there is a checkpoint, so we can load it
    # Note: if the database name or the server name is different, data is loaded
    # but we raise a warning.
    for db_name in ("test_database", "test_database_2"):
        bot.mining_widget.database_name = db_name
        del bot.mining_widget.table_extractions
        bot.click("load_button")
        assert np.array_equal(
            pd.DataFrame(data["mining_widget_extractions"]).values,
            bot.mining_widget.table_extractions.values,
        )

        displayed = bot.display_cached
        if db_name != "test_database":
            assert isinstance(displayed[-1], pd.DataFrame)
            assert "WARNING" in displayed[-2].data
            assert "DONE" in displayed[-3].data
            assert "Loading mining results from disk..." in displayed[-4].data
        else:
            assert isinstance(displayed[-1], pd.DataFrame)
            assert "DONE" in displayed[-2].data
            assert "Loading mining results from disk..." in displayed[-3].data