def test_db_reader( make_test_database: Callable[[str], DatabaseConnector], write_to_split: Callable[[str], DatabaseConnector], ) -> None: """ Test we can create read from the database. Specifically this test that: 1. We can make a query 2. That an empty db has not been populated 3. That a populated db has table entries 4. Check that we can extract the id for a given set of values which exist 5. Check that no id is returned if a given set of values is not found in the database 6. That we can get the latest row id which has been written to Parameters ---------- make_test_database : function Function which returns the database connection write_to_split : function Function returning the database connection where `split` has been populated """ empty_db_connector = make_test_database("empty_read_test") empty_db_reader = DatabaseReader(empty_db_connector) # Check that we can make a query table = empty_db_reader.query("SELECT 1+1 AS col") assert table.loc[0, "col"] == 2 # pylint: disable=no-member # Check that the tables has not been created in an empty db assert not empty_db_reader.check_tables_created() db_connector = write_to_split("read_test") db_reader = DatabaseReader(db_connector) # Check that tables exist assert db_reader.check_tables_created() # As write_to_split writes to the split table, we can get the # written values with the following query table = db_reader.query("SELECT * FROM split") entries_dict = table.to_dict(orient="records")[0] # Remove the 'id' entries_dict.pop("id") row_id = db_reader.get_entry_id("split", entries_dict) assert row_id == 1 # Modify entries_dict so that row_id returns None entries_dict[list(entries_dict.keys())[0]] += 1 new_row_id = db_reader.get_entry_id("split", entries_dict) assert new_row_id is None # Assert that get_latest_row_id is working assert db_reader.get_latest_row_id() == 1
def test_db_creator( make_test_database: Callable[[str], DatabaseConnector], make_test_schema: Callable[ [str], Tuple[DatabaseConnector, Dict[str, Dict[str, str]]] ], ) -> None: """ Test we can create the database schemas. Specifically this test that: 1. The database is empty on creation 2. The tables are created 3. It is not possible to create the schema more than once 4. Check that all expected tables have been created Parameters ---------- make_test_database : function Function returning the database connection make_test_schema : function Function returning the database connection and the final parameters as sql types """ db_connector_no_schema = make_test_database("test_creation_without_schema") db_reader_no_schema = DatabaseReader(db_connector_no_schema) # There should be no tables before creating them assert not db_reader_no_schema.check_tables_created() db_connector_schema, final_parameters_as_sql_types = make_test_schema( "test_creation_with_schema" ) db_reader_schema = DatabaseReader(db_connector_schema) db_creator = DatabaseCreator(db_connector_schema) # The tables should now have been created assert db_reader_schema.check_tables_created() with pytest.raises(sqlite3.OperationalError): db_creator.create_all_schema_tables(final_parameters_as_sql_types) # Check that all tables has been created non_parameter_tables = { "system_info", "split", "file_modification", "parameters", "run", } parameter_tables = set( el.replace(":", "_") for el in final_parameters_as_sql_types.keys() ) query_str = 'SELECT name FROM sqlite_master WHERE type="table"' table = db_reader_schema.query(query_str) actual = table.loc[:, "name"].values # pylint: disable=no-member assert non_parameter_tables.union(parameter_tables) == set(actual)
def __init__( self, db_connector: Optional[DatabaseConnector] = None, project_path: Optional[Union[Path, str]] = None, ) -> None: """ Set connector, reader and a project path. Notes ----- The StatusChecker instance only checks the project belonging to the same database schema grouped together by the `db_connector` Parameters ---------- db_connector : DatabaseConnector Connection to the database project_path : Path Path to the project (the root directory with which usually contains the makefile and the executable) """ self.__db_connector = (db_connector if db_connector is not None else DatabaseConnector()) self.__db_reader = DatabaseReader(self.__db_connector) self.__project_path = Path( project_path) if project_path is not None else Path()
def test_status_checker_until_complete_infinite( get_test_data_path: Path, get_test_db_copy: Callable[[str], DatabaseConnector], copy_test_case_log_file: Callable[[str], None], ) -> None: """ Test the infinite loop of StatusChecker. Parameters ---------- get_test_data_path : Path Path to the test data get_test_db_copy : function Function which returns a DatabaseConnector connected to a copy of test.db copy_test_case_log_file : function Return the function for copying the test case log files """ test_case = "infinite_log_file_pid_started_ended_no_mock_pid_complete" project_path = get_test_data_path db_connector = get_test_db_copy(test_case) copy_test_case_log_file(test_case) # Remove row which has status running (as it will always have # this status) db_connector.execute_statement("DELETE FROM run WHERE name = 'testdata_5'") db_reader = DatabaseReader(db_connector) status_checker = StatusChecker(db_connector, project_path) status_checker.check_and_update_until_complete() query = status_checker.get_query_string_for_non_errored_runs() assert len(db_reader.query(query).index) == 0
def test_db_writer( make_test_schema: Callable[[str], Tuple[DatabaseConnector, str]]) -> None: """ Test we can create write to the database schemas. Specifically this test that: 1. We can write to the `split` table 2. That only one record is made 3. That the type is correct 4. Check that the values are correct 5. Check that it's possible to update the values Parameters ---------- make_test_schema : function Function returning the database connection with the schema created """ db_connector, _ = make_test_schema("write_test") db_reader = DatabaseReader(db_connector) db_writer = DatabaseWriter(db_connector) table_name = "split" dummy_split_dict = { "number_of_processors": 41, "number_of_nodes": 42, "processors_per_node": 43, } db_writer.create_entry(table_name, dummy_split_dict) # NOTE: Protected against SQL injection as table_name is hard-coded above table = db_reader.query(f"SELECT * FROM {table_name}") # nosec # Check that the shape is expected (note that one column is # assigned to the id) assert table.shape == (1, 4) # Check all the elements are the same # https://www.quora.com/How-do-you-check-if-all-elements-in-a-NumPy-array-are-the-same-in-Python-pandas values = table.dtypes.values assert (values == np.dtype("int64")).all() for key, value in dummy_split_dict.items(): assert table.loc[0, key] == value # pylint: disable=no-member update_fields = ("number_of_processors", "number_of_nodes") search_condition = (f"processors_per_node = " f'{dummy_split_dict["processors_per_node"]}') values = tuple(dummy_split_dict[field] - 10 for field in update_fields) db_writer.update( db_writer.create_update_string(update_fields, table_name, search_condition), values, ) # NOTE: Protected against SQL injection as table_name is hard-coded above table = db_reader.query(f"SELECT * FROM {table_name}") # nosec for index, field in enumerate(update_fields): # pylint: disable=no-member assert table.loc[:, field].values[0] == values[index]
def _get_metadata_updater_and_db_reader( name: str, ) -> Tuple[MetadataUpdater, DatabaseReader]: """ Return a MetadataUpdater and its DatabaseConnector. Parameters ---------- name : str Name of the temporary database Returns ------- metadata_updater : MetadataUpdater Object to update the database with db_reader : DatabaseReader The corresponding database reader """ db_connector = get_test_db_copy(name) db_reader = DatabaseReader(db_connector) metadata_updater = MetadataUpdater(db_connector, 1) return metadata_updater, db_reader
def assert_first_run(bout_paths: BoutPaths, db_connector: DatabaseConnector) -> DatabaseReader: """ Assert that the first run went well. Parameters ---------- bout_paths : BoutPaths The object containing the paths db_connector : DatabaseConnector The database connection Returns ------- db_reader : DatabaseReader The database reader object """ db_reader = DatabaseReader(db_connector) assert_dump_files_exist(bout_paths.bout_inp_dst_dir) assert db_reader.check_tables_created() return db_reader
def __init__( self, db_connector: Optional[DatabaseConnector] = None, drop_id: Optional[str] = "keep_run_id", ) -> None: """ Set the database to use. Parameters ---------- db_connector : DatabaseConnector or None The connection to the database If None: Default database connector will be used drop_id : None or str Specifies what id columns should be dropped when obtaining the metadata - None : No columns will be dropped - 'parameters' : All columns containing parameters ids will be dropped - 'keep_run_id' : Only the run.id of the id columns will be kept - 'all_id' : All id columns will be removed """ self.drop_id = drop_id db_connector = db_connector if db_connector is not None else DatabaseConnector() self.__db_reader = DatabaseReader(db_connector) self.__table_names = self.__get_all_table_names() self.__table_column_dict = self.__get_table_column_dict() self.__table_connections = self.__get_table_connections() self.__sorted_columns = self.__get_sorted_columns() parameters_connections = {"parameters": self.__table_connections["parameters"]} parameters_tables = ("parameters", *parameters_connections["parameters"]) self.__parameters_columns = tuple( str(col) for col in self.__sorted_columns if col.split(".")[0] in parameters_tables )
def __init__( self, db_connector: DatabaseConnector, bout_paths: BoutPaths, final_parameters: FinalParameters, ) -> None: """ Set the database to use. Parameters ---------- db_connector : DatabaseConnector The database connector bout_paths : BoutPaths Object containing the paths final_parameters : FinalParameters Object containing the final parameters """ self.__db_writer = DatabaseWriter(db_connector) self.__db_reader = DatabaseReader(db_connector) self.__bout_paths = bout_paths self.__final_parameters = final_parameters self.__make = Make(self.__bout_paths.project_path)
def large_graph_tester( submitter_type: Type[AbstractSubmitter], make_project: Path, yield_number_of_rows_for_all_tables: Callable[[DatabaseReader], Dict[str, int]], file_state_restorer: FileStateRestorer, ) -> None: """ Test that the graph with 10 nodes work as expected. The node setup can be found in node_functions.py Parameters ---------- submitter_type : type Used to assert that the correct submitter is used make_project : Path The path to the conduction example yield_number_of_rows_for_all_tables : function Function which returns the number of rows for all tables in a schema file_state_restorer : FileStateRestorer Object for restoring files to original state """ name = f"test_large_graph_{submitter_type.__name__}" node_adder = LargeGraphNodeAdder( name, make_project, submitter_type, file_state_restorer ) # RunGroup belonging to node 2 node_adder.add_and_assert_node_group_2() # RunGroup belonging to node 3 and 4 node_adder.add_and_assert_node_group_3_and_4() # RunGroup belonging to node 6 node_8 = node_adder.add_and_assert_node_group_6() # RunGroup belonging to node 9 node_adder.add_and_assert_node_node_9(node_8) # Run the project runner = BoutRunner(node_adder.run_graph) runner.run() runner.wait_until_completed() # Check that all the nodes have changed status with pytest.raises(RuntimeError): runner.run() runner.wait_until_completed() # Check that all files are present # Check that the pre and post files are present for node in (0, 1, 5, 7, 8, 10): assert ( node_adder.paths["pre_and_post_directory"].joinpath(f"{node}.txt").is_file() ) # Check that all the dump files are present for restart_str in ("", "_restart_0", "_restart_1", "_restart_2"): assert ( node_adder.paths["project_path"] .joinpath(f"{name}{restart_str}", "BOUT.dmp.0.nc") .is_file() or node_adder.paths["project_path"] .joinpath(f"{name}{restart_str}", "BOUT.dmp.0.h5") .is_file() ) # NOTE: We will only have 4 runs as node 4 is a duplicate of node 2 and will # therefore be skipped number_of_runs = 4 assert_tables_have_expected_len( DatabaseReader(node_adder.run_groups["run_group_2"].db_connector), yield_number_of_rows_for_all_tables, expected_run_number=number_of_runs, restarted=True, ) simulation_steps = LogReader( node_adder.paths["project_path"].joinpath(f"{name}_restart_2", "BOUT.log.0") ).get_simulation_steps() # NOTE: nout=0 set in the function tests.utils.run.make_run_group assert np.isclose(simulation_steps.loc[simulation_steps.index[-1], "Sim_time"], 0.0)
def test_large_graph( make_project: Path, yield_number_of_rows_for_all_tables: Callable[[DatabaseReader], Dict[str, int]], clean_default_db_dir: Path, tear_down_restart_directories: Callable[[Path], None], ) -> None: """ Test that the graph with 10 nodes work as expected. The node setup can be found in node_functions.py Parameters ---------- make_project : Path The path to the conduction example yield_number_of_rows_for_all_tables : function Function which returns the number of rows for all tables in a schema clean_default_db_dir : Path Path to the default database directory tear_down_restart_directories : function Function used for removal of restart directories """ _ = clean_default_db_dir name = "test_large_graph" paths = dict() paths["project_path"] = make_project paths["pre_and_post_directory"] = paths["project_path"].joinpath( f"pre_and_post_{name}") paths["pre_and_post_directory"].mkdir() run_groups = dict() # RunGroup belonging to node 2 run_groups["run_group_2"] = make_run_group(name, make_project) run_graph = run_groups["run_group_2"].run_graph paths["bout_run_directory_node_2"] = run_groups[ "run_group_2"].bout_paths.bout_inp_dst_dir run_groups["run_group_2"].add_pre_processor({ "function": node_zero, "args": ( paths["bout_run_directory_node_2"], paths["pre_and_post_directory"], ), "kwargs": None, }) run_groups["run_group_2"].add_pre_processor({ "function": node_one, "args": ( paths["bout_run_directory_node_2"], paths["pre_and_post_directory"], ), "kwargs": None, }) run_groups["run_group_2"].add_post_processor({ "function": node_five, "args": ( paths["bout_run_directory_node_2"], paths["pre_and_post_directory"], ), "kwargs": None, }) tear_down_restart_directories(paths["bout_run_directory_node_2"]) # RunGroup belonging to node 3 run_groups["run_group_3"] = make_run_group( name, make_project, run_graph, restart_from=run_groups["run_group_2"].bout_paths.bout_inp_dst_dir, waiting_for=run_groups["run_group_2"].bout_run_node_name, ) # RunGroup belonging to node 4 run_groups["run_group_4"] = make_run_group(name, make_project, run_graph) paths["bout_run_directory_node_4"] = run_groups[ "run_group_4"].bout_paths.bout_inp_dst_dir # RunGroup belonging to node 6 run_groups["run_group_6"] = make_run_group( name, make_project, run_graph, restart_from=run_groups["run_group_2"].bout_paths.bout_inp_dst_dir, waiting_for=run_groups["run_group_2"].bout_run_node_name, ) paths["bout_run_directory_node_6"] = run_groups[ "run_group_6"].bout_paths.bout_inp_dst_dir node_8 = run_groups["run_group_6"].add_post_processor( { "function": node_eight, "args": ( paths["bout_run_directory_node_4"], paths["bout_run_directory_node_6"], paths["pre_and_post_directory"], ), "kwargs": None, }, waiting_for=run_groups["run_group_4"].bout_run_node_name, ) # RunGroup belonging to node 9 # NOTE: We need the paths['bout_run_directory_node_9'] as an input in node 7 # As node 9 is waiting for node 7 we hard-code the name # (as we will know what it will be) paths["bout_run_directory_node_9"] = paths["project_path"].joinpath( f"{name}_restart_2") # The function of node_seven belongs to RunGroup2, but takes # paths['bout_run_directory_node_9'] as an input node_7_name = run_groups["run_group_2"].add_post_processor({ "function": node_seven, "args": ( paths["bout_run_directory_node_2"], paths["bout_run_directory_node_9"], paths["pre_and_post_directory"], ), "kwargs": None, }) run_groups["run_group_9"] = make_run_group( name, make_project, run_graph, restart_from=run_groups["run_group_6"].bout_paths.bout_inp_dst_dir, waiting_for=( run_groups["run_group_4"].bout_run_node_name, run_groups["run_group_6"].bout_run_node_name, node_7_name, ), ) run_groups["run_group_9"].add_post_processor( { "function": node_ten, "args": ( paths["bout_run_directory_node_9"], paths["pre_and_post_directory"], ), "kwargs": None, }, waiting_for=node_8, ) # Run the project runner = BoutRunner(run_graph) runner.run() # Check that all the nodes have changed status with pytest.raises(RuntimeError): runner.run() # Check that all files are present # Check that the pre and post files are present for node in (0, 1, 5, 7, 8, 10): assert paths["pre_and_post_directory"].joinpath( f"{node}.txt").is_file() # Check that all the dump files are present for restart_str in ("", "_restart_0", "_restart_1", "_restart_2"): assert (paths["project_path"].joinpath( f"{name}{restart_str}").joinpath("BOUT.dmp.0.nc").is_file() or paths["project_path"].joinpath(f"{name}{restart_str}"). joinpath("BOUT.dmp.0.h5").is_file()) # NOTE: We will only have 4 runs as node 4 is a duplicate of node 2 and will # therefore be skipped number_of_runs = 4 assert_tables_have_expected_len( DatabaseReader(run_groups["run_group_2"].db_connector), yield_number_of_rows_for_all_tables, expected_run_number=number_of_runs, restarted=True, )
def test_status_checker( test_case: str, get_test_data_path: Path, get_test_db_copy: Callable[[str], DatabaseConnector], mock_pid_exists: Callable[[str], None], copy_test_case_log_file: Callable[[str], None], ) -> None: """ Test the StatusChecker exhaustively (excluding raises and loop). Parameters ---------- test_case : str Description of the test on the form >>> ('<log_file_present>_<pid_present_in_log>_' ... '<started_time_present_in_log>_<ended_time_present_in_log>' ... '_<whether_pid_exists>_<new_status>') get_test_data_path : Path Path to test data get_test_db_copy : function Function which returns a a database connector to the copy of the test database mock_pid_exists : function Function which sets up a monkeypatch for psutil.pid_exist copy_test_case_log_file : function Function which copies log files according to the test_case """ project_path = get_test_data_path db_connector = get_test_db_copy(test_case) mock_pid_exists(test_case) copy_test_case_log_file(test_case) db_reader = DatabaseReader(db_connector) status_checker = StatusChecker(db_connector, project_path) status_checker.check_and_update_status() # Check that the correct status has been assigned to "running" # pylint: disable=no-member result = db_reader.query("SELECT latest_status FROM run WHERE name = " "'testdata_5'").loc[0, "latest_status"] assert result == "running" # Check that the correct status has been assigned to "submitted" expected = test_case.split("_")[-1] # pylint: disable=no-member result = db_reader.query("SELECT latest_status FROM run WHERE name = " "'testdata_6'").loc[0, "latest_status"] assert result == expected # Check that correct start_time has been set if "not_started" not in test_case: expected = str(datetime(2020, 5, 1, 17, 7, 10)) # pylint: disable=no-member result = db_reader.query("SELECT start_time FROM run WHERE name = " "'testdata_6'").loc[0, "start_time"] assert expected == result # Check that correct end_time has been set if "not_ended" not in test_case and "complete" in test_case: expected = str(datetime(2020, 5, 1, 17, 7, 14)) # pylint: disable=no-member result = db_reader.query("SELECT stop_time FROM run WHERE name = " "'testdata_6'").loc[0, "stop_time"] assert expected == result