def test_mining_database(monkeypatch, capsys, fake_sqlalchemy_engine, mining_schema_df): mining_schema_df = mining_schema_df.drop_duplicates(ignore_index=True) responses.add_callback( responses.POST, "http://test/database", callback=request_callback, content_type="text/csv", ) responses.add_callback( responses.POST, "http://test/help", callback=request_callback_help, content_type="application/json", ) mining_schema = MiningSchema() mining_schema.add_from_df(mining_schema_df) mining_widget = MiningWidget( mining_server_url="http://test", mining_schema=mining_schema, ) empty_dataframe = pd.DataFrame() assert empty_dataframe.equals(mining_widget.get_extracted_table()) bot = MiningWidgetBot(mining_widget, capsys, monkeypatch) bot.set_value("input_text", "HELLO") bot.click("mine_articles") assert len(responses.calls) == 1 assert "No article saver was provided. Nothing to mine." in bot.stdout_cached article_saver = ArticleSaver(fake_sqlalchemy_engine) for i in range(2): article_saver.add_article(article_id=i) mining_widget = MiningWidget( mining_server_url="http://test", mining_schema=mining_schema, article_saver=article_saver, ) bot = MiningWidgetBot(mining_widget, capsys, monkeypatch) bot.set_value("input_text", "HELLO") bot.click("mine_articles") assert len(responses.calls) == 3 assert "Collecting saved items..." in bot.stdout_cached assert isinstance(mining_widget.get_extracted_table(), pd.DataFrame) display_objs = bot.display_cached assert len(display_objs) == 3 # 1 schema + 1 warning + 1 table_extractions assert isinstance(display_objs[0], pd.DataFrame) assert isinstance(display_objs[2], pd.DataFrame) assert display_objs[0].equals(mining_schema_df) assert isinstance(display_objs[1], HTML) assert display_objs[2].equals(table_extractions)
def test_mining_text(monkeypatch, capsys, mining_schema_df): mining_schema_df = mining_schema_df.drop_duplicates(ignore_index=True) responses.add_callback( responses.POST, "http://test/text", callback=request_callback, content_type="application/json", ) responses.add_callback( responses.POST, "http://test/help", callback=request_callback_help, content_type="application/json", ) mining_schema = MiningSchema() mining_schema.add_from_df(mining_schema_df) mining_widget = MiningWidget( mining_server_url="http://test", mining_schema=mining_schema, ) bot = MiningWidgetBot(mining_widget, capsys, monkeypatch) bot.set_value("input_text", "HELLO") bot.click("mine_text") assert len(responses.calls) == 2 display_objs = bot.display_cached assert len(display_objs) == 3 # 1 schema + 1 warning + 1 table_extractions assert isinstance(display_objs[0], pd.DataFrame) assert display_objs[0].equals(mining_schema_df) assert isinstance(display_objs[1], HTML) assert display_objs[2].equals(table_extractions)
def test_add_entity(): mining_schema = MiningSchema() # Test adding entities mining_schema.add_entity( "CHEMICAL", property_name="isChiral", property_type="ATTRIBUTE", property_value_type="BOOLEAN", ontology_source="NCIT", ) mining_schema.add_entity("DRUG") assert len(mining_schema.schema_df) == 2 # Test warning upon adding a duplicate entity with pytest.warns(UserWarning, match=r"already exists"): mining_schema.add_entity("DRUG")
def test_df(mining_schema_df): # We won't be testing for duplicates in this test mining_schema_df = mining_schema_df.drop_duplicates(ignore_index=True) # Test adding from a dataframe mining_schema = MiningSchema() mining_schema.add_from_df(mining_schema_df) # Make sure a copy is returned assert mining_schema.df is not mining_schema.schema_df # Check that all data was added assert mining_schema.df.equals(mining_schema_df) # Test missing entity_type wrong_schema_df = mining_schema_df.drop("entity_type", axis=1) mining_schema = MiningSchema() with pytest.raises(ValueError, match=r"entity_type.* not found"): mining_schema.add_from_df(wrong_schema_df) # Test ignoring unknown columns schema_df_new = mining_schema_df.drop_duplicates().copy() schema_df_new["unknown_column"] = [i for i in range(len(schema_df_new))] mining_schema = MiningSchema() with pytest.warns(UserWarning, match=r"column.* unknown_column"): mining_schema.add_from_df(schema_df_new) # Check that all data was added and the unknown columns was ignored assert mining_schema.df.equals(mining_schema_df)
def test_save_load_checkpoint(monkeypatch, capsys, mining_schema_df, tmpdir): mining_schema_df = mining_schema_df.drop_duplicates(ignore_index=True) responses.add_callback( responses.POST, "http://test/text", callback=request_callback, content_type="application/json", ) responses.add_callback( responses.POST, "http://test/help", callback=request_callback_help, content_type="application/json", ) mining_schema = MiningSchema() mining_schema.add_from_df(mining_schema_df) mining_widget = MiningWidget( mining_server_url="http://test", mining_schema=mining_schema, checkpoint_path=tmpdir, ) bot = MiningWidgetBot(mining_widget, capsys, monkeypatch) bot.set_value("input_text", "HELLO") # Try saving data, but no results to save bot.click("save_button") last_displayed = bot.display_cached[-1].data assert "ERROR!" in last_displayed assert "No mining results available." in last_displayed # Click on "investigate" bot.click("mine_text") # Try loading data, but no checkpoint was saved there bot.click("load_button") last_displayed = bot.display_cached[-1].data assert "ERROR!" in last_displayed assert "No checkpoint file found to load." in last_displayed # Now there are some results, so we can save a checkpoint bot.click("save_button") displayed = bot.display_cached with bot.mining_widget.checkpoint_path.open("r") as f: data = json.load(f) assert np.array_equal( pd.DataFrame(data["mining_widget_extractions"]).values, bot.mining_widget.table_extractions.values, ) assert data["database_name"] == bot.mining_widget.database_name assert data["mining_server_version"] == bot.mining_widget.mining_server_version assert "DONE" in displayed[-1].data assert "Saving mining results to disk..." in displayed[-2].data # Now there is a checkpoint, so we can load it # Note: if the database name or the server name is different, data is loaded # but we raise a warning. for db_name in ("test_database", "test_database_2"): bot.mining_widget.database_name = db_name del bot.mining_widget.table_extractions bot.click("load_button") assert np.array_equal( pd.DataFrame(data["mining_widget_extractions"]).values, bot.mining_widget.table_extractions.values, ) displayed = bot.display_cached if db_name != "test_database": assert isinstance(displayed[-1], pd.DataFrame) assert "WARNING" in displayed[-2].data assert "DONE" in displayed[-3].data assert "Loading mining results from disk..." in displayed[-4].data else: assert isinstance(displayed[-1], pd.DataFrame) assert "DONE" in displayed[-2].data assert "Loading mining results from disk..." in displayed[-3].data