Python DatabaseReaderの例、bout_runners.database.database_reader.DatabaseReader Pythonの例

コード例 #1

0

ファイルを表示

    def __init__(
        self,
        db_connector: Optional[DatabaseConnector] = None,
        project_path: Optional[Union[Path, str]] = None,
    ) -> None:
        """
        Set connector, reader and a project path.

        Notes
        -----
        The StatusChecker instance only checks the project belonging to the same
        database schema grouped together by the `db_connector`

        Parameters
        ----------
        db_connector : DatabaseConnector
            Connection to the database
        project_path : Path
            Path to the project (the root directory with which usually contains the
            makefile and the executable)
        """
        self.__db_connector = (db_connector if db_connector is not None else
                               DatabaseConnector())
        self.__db_reader = DatabaseReader(self.__db_connector)
        self.__project_path = Path(
            project_path) if project_path is not None else Path()

コード例 #2

0

ファイルを表示

    def _get_number_of_rows_for_all_tables(
            db_reader: DatabaseReader) -> Dict[str, int]:
        """
        Return the number of rows for all tables in a schema.

        Parameters
        ----------
        db_reader : DatabaseReader
            The object used read from the database

        Returns
        -------
        number_of_rows_dict : dict
            Dict on the form

            >>> {'table_name_1': int, 'table_name_2': int, ...}
        """
        number_of_rows_dict = dict()
        query_str = ("SELECT name FROM sqlite_master\n"
                     "    WHERE type ='table'\n"
                     "    AND name NOT LIKE 'sqlite_%'")
        table_of_tables = db_reader.query(query_str)
        for _, table_name_as_series in table_of_tables.iterrows():
            table_name = table_name_as_series["name"]
            # NOTE: SQL injection possible through bad table name, however the table
            #       names are hard-coded in this example
            query_str = f"SELECT COUNT(*) AS rows FROM {table_name}"  # nosec
            table = db_reader.query(query_str)
            number_of_rows_dict[table_name] = table.loc[0, "rows"]
        return number_of_rows_dict

コード例 #3

0

ファイルを表示

ファイル: test_status_checker.py プロジェクト: CELMA-project/bout_runners

def test_status_checker_until_complete_infinite(
    get_test_data_path: Path,
    get_test_db_copy: Callable[[str], DatabaseConnector],
    copy_test_case_log_file: Callable[[str], None],
) -> None:
    """
    Test the infinite loop of StatusChecker.

    Parameters
    ----------
    get_test_data_path : Path
        Path to the test data
    get_test_db_copy : function
        Function which returns a DatabaseConnector connected to a copy of test.db
    copy_test_case_log_file : function
        Return the function for copying the test case log files
    """
    test_case = "infinite_log_file_pid_started_ended_no_mock_pid_complete"

    project_path = get_test_data_path
    db_connector = get_test_db_copy(test_case)
    copy_test_case_log_file(test_case)

    # Remove row which has status running (as it will always have
    # this status)
    db_connector.execute_statement("DELETE FROM run WHERE name = 'testdata_5'")

    db_reader = DatabaseReader(db_connector)

    status_checker = StatusChecker(db_connector, project_path)
    status_checker.check_and_update_until_complete()

    query = status_checker.get_query_string_for_non_errored_runs()
    assert len(db_reader.query(query).index) == 0

コード例 #4

0

ファイルを表示

ファイル: test_database_writer.py プロジェクト: CELMA-project/bout_runners

def test_db_writer(
        make_test_schema: Callable[[str], Tuple[DatabaseConnector,
                                                str]]) -> None:
    """
    Test we can create write to the database schemas.

    Specifically this test that:
    1. We can write to the `split` table
    2. That only one record is made
    3. That the type is correct
    4. Check that the values are correct
    5. Check that it's possible to update the values

    Parameters
    ----------
    make_test_schema : function
        Function returning the database connection with the schema created
    """
    db_connector, _ = make_test_schema("write_test")
    db_reader = DatabaseReader(db_connector)

    db_writer = DatabaseWriter(db_connector)
    table_name = "split"
    dummy_split_dict = {
        "number_of_processors": 41,
        "number_of_nodes": 42,
        "processors_per_node": 43,
    }
    db_writer.create_entry(table_name, dummy_split_dict)

    # NOTE: Protected against SQL injection as table_name is hard-coded above
    table = db_reader.query(f"SELECT * FROM {table_name}")  # nosec

    # Check that the shape is expected (note that one column is
    # assigned to the id)
    assert table.shape == (1, 4)

    # Check all the elements are the same
    # https://www.quora.com/How-do-you-check-if-all-elements-in-a-NumPy-array-are-the-same-in-Python-pandas
    values = table.dtypes.values
    assert (values == np.dtype("int64")).all()

    for key, value in dummy_split_dict.items():
        assert table.loc[0, key] == value  # pylint: disable=no-member

    update_fields = ("number_of_processors", "number_of_nodes")
    search_condition = (f"processors_per_node = "
                        f'{dummy_split_dict["processors_per_node"]}')
    values = tuple(dummy_split_dict[field] - 10 for field in update_fields)
    db_writer.update(
        db_writer.create_update_string(update_fields, table_name,
                                       search_condition),
        values,
    )
    # NOTE: Protected against SQL injection as table_name is hard-coded above
    table = db_reader.query(f"SELECT * FROM {table_name}")  # nosec
    for index, field in enumerate(update_fields):
        # pylint: disable=no-member
        assert table.loc[:, field].values[0] == values[index]

コード例 #5

0

ファイルを表示

ファイル: test_database_creator.py プロジェクト: CELMA-project/bout_runners

def test_db_creator(
    make_test_database: Callable[[str], DatabaseConnector],
    make_test_schema: Callable[
        [str], Tuple[DatabaseConnector, Dict[str, Dict[str, str]]]
    ],
) -> None:
    """
    Test we can create the database schemas.

    Specifically this test that:
    1. The database is empty on creation
    2. The tables are created
    3. It is not possible to create the schema more than once
    4. Check that all expected tables have been created

    Parameters
    ----------
    make_test_database : function
        Function returning the database connection
    make_test_schema : function
        Function returning the database connection and the final parameters as sql types
    """
    db_connector_no_schema = make_test_database("test_creation_without_schema")
    db_reader_no_schema = DatabaseReader(db_connector_no_schema)

    # There should be no tables before creating them
    assert not db_reader_no_schema.check_tables_created()

    db_connector_schema, final_parameters_as_sql_types = make_test_schema(
        "test_creation_with_schema"
    )
    db_reader_schema = DatabaseReader(db_connector_schema)
    db_creator = DatabaseCreator(db_connector_schema)

    # The tables should now have been created
    assert db_reader_schema.check_tables_created()

    with pytest.raises(sqlite3.OperationalError):
        db_creator.create_all_schema_tables(final_parameters_as_sql_types)

    # Check that all tables has been created
    non_parameter_tables = {
        "system_info",
        "split",
        "file_modification",
        "parameters",
        "run",
    }
    parameter_tables = set(
        el.replace(":", "_") for el in final_parameters_as_sql_types.keys()
    )
    query_str = 'SELECT name FROM sqlite_master WHERE type="table"'
    table = db_reader_schema.query(query_str)

    actual = table.loc[:, "name"].values  # pylint: disable=no-member
    assert non_parameter_tables.union(parameter_tables) == set(actual)

コード例 #6

0

ファイルを表示

ファイル: run.py プロジェクト: fossabot/bout_runners

def assert_first_run(bout_paths: BoutPaths,
                     db_connector: DatabaseConnector) -> DatabaseReader:
    """
    Assert that the first run went well.

    Parameters
    ----------
    bout_paths : BoutPaths
        The object containing the paths
    db_connector : DatabaseConnector
        The database connection

    Returns
    -------
    db_reader : DatabaseReader
        The database reader object
    """
    db_reader = DatabaseReader(db_connector)
    assert_dump_files_exist(bout_paths.bout_inp_dst_dir)
    assert db_reader.check_tables_created()
    return db_reader

コード例 #7

0

ファイルを表示

ファイル: metadata_reader.py プロジェクト: fossabot/bout_runners

    def __init__(
        self,
        db_connector: Optional[DatabaseConnector] = None,
        drop_id: Optional[str] = "keep_run_id",
    ) -> None:
        """
        Set the database to use.

        Parameters
        ----------
        db_connector : DatabaseConnector or None
            The connection to the database
            If None: Default database connector will be used
        drop_id : None or str
            Specifies what id columns should be dropped when obtaining the metadata
            - None : No columns will be dropped
            - 'parameters' : All columns containing parameters ids
              will be dropped
            - 'keep_run_id' : Only the run.id of the id columns will be
              kept
            - 'all_id' : All id columns will be removed
        """
        self.drop_id = drop_id

        db_connector = db_connector if db_connector is not None else DatabaseConnector()
        self.__db_reader = DatabaseReader(db_connector)

        self.__table_names = self.__get_all_table_names()
        self.__table_column_dict = self.__get_table_column_dict()
        self.__table_connections = self.__get_table_connections()
        self.__sorted_columns = self.__get_sorted_columns()

        parameters_connections = {"parameters": self.__table_connections["parameters"]}
        parameters_tables = ("parameters", *parameters_connections["parameters"])
        self.__parameters_columns = tuple(
            str(col)
            for col in self.__sorted_columns
            if col.split(".")[0] in parameters_tables
        )

コード例 #8

0

ファイルを表示

ファイル: metadata_recorder.py プロジェクト: fossabot/bout_runners

    def __init__(
        self,
        db_connector: DatabaseConnector,
        bout_paths: BoutPaths,
        final_parameters: FinalParameters,
    ) -> None:
        """
        Set the database to use.

        Parameters
        ----------
        db_connector : DatabaseConnector
            The database connector
        bout_paths : BoutPaths
            Object containing the paths
        final_parameters : FinalParameters
            Object containing the final parameters
        """
        self.__db_writer = DatabaseWriter(db_connector)
        self.__db_reader = DatabaseReader(db_connector)
        self.__bout_paths = bout_paths
        self.__final_parameters = final_parameters
        self.__make = Make(self.__bout_paths.project_path)

コード例 #9

0

ファイルを表示

    def _get_metadata_updater_and_db_reader(
        name: str, ) -> Tuple[MetadataUpdater, DatabaseReader]:
        """
        Return a MetadataUpdater and its DatabaseConnector.

        Parameters
        ----------
        name : str
            Name of the temporary database

        Returns
        -------
        metadata_updater : MetadataUpdater
            Object to update the database with
        db_reader : DatabaseReader
            The corresponding database reader
        """
        db_connector = get_test_db_copy(name)
        db_reader = DatabaseReader(db_connector)
        metadata_updater = MetadataUpdater(db_connector, 1)
        return metadata_updater, db_reader

コード例 #10

0

ファイルを表示

class MetadataReader:
    r"""
    Class for reading the metadata from the database.

    Attributes
    ----------
    __db_reader : DatabaseConnector
        The connection to the database
    __table_names : tuple
        Getter variable for table_names
    __table_column_dict : dict of tuple
        Getter variable for table_column_dict
    __table_connections : dict of tuple
        Getter variable for table_connections
    __sorted_columns : tuple
        Getter variable for sorted_columns
    table_names : tuple
         A tuple containing all names of the tables
    table_column_dict : dict of tuple
        A dict where the keys are table names, and the values are corresponding
        column names
    table_connections : dict of tuple
        A dict where the keys are tables, and the values are tuples of tables
        connected to the key table
    sorted_columns : tuple
        A tuple of the column names as they will be sorted in the all_metadata DataFrame
    date_columns : tuple
        Columns containing dates
    drop_id : None or str
        Specifies what id columns should be dropped when obtaining the metadata

    Methods
    -------
    get_all_metadata()
        Return all of the run metadata
    get_parameters_metadata()
        Return only the parameter part of the run metadata
    get_join_query(from_statement, columns, alias_columns, table_connections)
        Return the query string of a `SELECT` query with `INNER JOIN`
    __get_parameters_query()
        Return the parameters query string
    __get_sorted_columns()
        Return all columns sorted
    __get_table_connections()
        Return a dict containing the table connections
    __get_all_table_names()
        Return all the table names in the schema
    __get_table_column_dict()
        Return all the column names of the specified tables

    Examples
    --------
    >>> from pathlib import Path
    >>> from bout_runners.database.database_connector import DatabaseConnector
    >>> db_connector = DatabaseConnector('test', Path())
    >>> metadata_reader = MetadataReader(db_connector)
    >>> metadata_reader.get_parameters_metadata()
       bar.id  bar.foo  ... parameters.baz_id  parameters.foo_id
    0       1        1  ...                 1                  1
    1       2       10  ...                 1                  2
    2       2       10  ...                 1                  1

    [3 rows x 16 columns]

    >>> metadata_reader.get_all_metadata()
       run.id  ...                  system_info.version
    0       1  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    1       2  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    2       3  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    3       4  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    4       5  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    5       6  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    6       7  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019

    [7 rows x 43 columns]

    >>> metadata_reader.drop_id = 'all_id'
    >>> metadata_reader.get_all_metadata()
      run.latest_status  ...                  system_info.version
    0          complete  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    1          complete  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    2          complete  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    3          complete  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    4             error  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    5           running  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    6         submitted  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019

    [7 rows x 28 columns]
    """

    date_columns = (
        "run.start_time",
        "run.stop_time",
        "run.submitted_time",
        "file_modification.bout_lib_modified",
        "file_modification.project_executable_modified",
        "file_modification.project_makefile_modified",
    )

    def __init__(
        self,
        db_connector: DatabaseConnector,
        drop_id: Optional[str] = "keep_run_id",
    ) -> None:
        """
        Set the database to use.

        Parameters
        ----------
        db_connector : DatabaseConnector
            The connection to the database
        drop_id : None or str
            Specifies what id columns should be dropped when obtaining the metadata
            - None : No columns will be dropped
            - 'parameters' : All columns containing parameters ids
              will be dropped
            - 'keep_run_id' : Only the run.id of the id columns will be
              kept
            - 'all_id' : All id columns will be removed
        """
        self.drop_id = drop_id

        self.__db_reader = DatabaseReader(db_connector)

        self.__table_names = self.__get_all_table_names()
        self.__table_column_dict = self.__get_table_column_dict()
        self.__table_connections = self.__get_table_connections()
        self.__sorted_columns = self.__get_sorted_columns()

        parameters_connections = {"parameters": self.__table_connections["parameters"]}
        parameters_tables = ("parameters", *parameters_connections["parameters"])
        self.__parameters_columns = tuple(
            str(col)
            for col in self.__sorted_columns
            if col.split(".")[0] in parameters_tables
        )

    @property
    def table_names(self) -> Tuple[str, ...]:
        """
        Set the properties of self.table_names.

        Returns
        -------
        self.__table_names : tuple
            A tuple containing all names of the tables
        """
        return self.__table_names

    @property
    def table_column_dict(
        self,
    ) -> Dict[str, Tuple[str, ...]]:
        """
        Set the properties of self.table_column_dict.

        Returns
        -------
        self.__table_column_dict : dict of tuple
            A dict where the keys are table names, and the values are corresponding
            column names
        """
        return self.__table_column_dict

    @property
    def table_connection(
        self,
    ) -> Dict[str, Tuple[str, ...]]:
        """
        Set the properties of self.table_connections.

        Returns
        -------
        self.__table_connections : dict of tuple
            A dict where the keys are tables, and the values are tuples of tables
            connected to the key table
        """
        return self.__table_connections

    @property
    def sorted_columns(self) -> Tuple[str, ...]:
        """
        Set the properties of self.sorted_columns.

        Returns
        -------
        self.__sorted_columns : tuple
            A tuple of the column names as they will be sorted in the all_metadata
            DataFrame
        """
        return self.__sorted_columns

    @drop_ids
    def get_all_metadata(self):
        """
        Return all of the run metadata.

        Returns
        -------
        DataFrame
            The DataFrame of the run metadata
        """
        parameters_query = self.__get_parameters_query()

        # Adding spaces and parenthesis
        parameter_sub_query = "\n".join(
            [f'{" " * 6}{line}' for line in parameters_query.split("\n")]
        )
        parameter_sub_query = (
            f"{parameter_sub_query[:5]}({parameter_sub_query[6:-1]}) " f"AS subquery"
        )

        # NOTE: The subquery names are the names of the columns after
        #       the query. We would like to rename them to
        #       sorted_columns. Hence the `columns` field and
        #       `alias_columns` field appears swapped
        subquery_columns = [
            f'subquery."{col}"' if col in self.__parameters_columns else col
            for col in self.sorted_columns
        ]
        # Remove the parameters from the table_connection to avoid
        # double joining
        table_connections = self.__table_connections.copy()
        table_connections.pop("parameters")
        unfinished_all_metadata_query = self.get_join_query(
            "run", subquery_columns, self.sorted_columns, table_connections
        )

        # Update the parameters columns
        all_metadata_query = unfinished_all_metadata_query.replace(
            " parameters ", f"\n{parameter_sub_query}\n"
        ).replace("= parameters.id", '= subquery."parameters.id"')

        return self.__db_reader.query(all_metadata_query, parse_dates=self.date_columns)

    @drop_ids
    def get_parameters_metadata(self):
        """
        Return only the parameter part of the run metadata.

        Returns
        -------
        DataFrame
            The DataFrame of the parameter metadata
        """
        parameters_query = self.__get_parameters_query()

        return self.__db_reader.query(parameters_query)

    @staticmethod
    def get_join_query(
        from_statement: str,
        columns: Sequence[str],
        alias_columns: Sequence[str],
        table_connections: Dict[str, Tuple[str, ...]],
    ) -> str:
        """
        Return the query string of a `SELECT` query with `INNER JOIN`.

        Notes
        -----
        The tables in `table_connection` is assumed to be joined by `id`s. I.e.
        `table_a` is connected to `table_b` by `table_b` having a column named
        `table_a_id` which corresponds to the `id` column of `table_a`

        Parameters
        ----------
        from_statement : str
            The statement after the `FROM` keyword in the query
            I.e.

            >>> f'SELECT * FROM {from_statement}'

        columns : array_like
            The columns to select from the tables
            I.e.

            >>> f'SELECT {columns} FROM *'

        alias_columns : array_like
            The name of the columns in the resulting table
            I.e.

            >>> f'SELECT {columns[0]} AS {alias_columns[0]} FROM *'

        table_connections : dict
            A dict where the keys are the table names, and the values are tuples
            containing table names connected to the key table as described in the
            note above

        Returns
        -------
        query : str
            The SQL-string which can be used to query where table in databases are
            joined through `INNER JOIN` operations
        """
        query = "SELECT\n"
        for column, alias in zip(columns, alias_columns):
            query += f'{" " * 7}{column} AS "{alias}",\n'
        # Remove last comma
        query = f"{query[:-2]}\n"
        query += f"FROM {from_statement}\n"
        for left_table in table_connections.keys():
            for right_table in table_connections[left_table]:
                query += (
                    f'{" " * 4}INNER JOIN {right_table} ON '
                    f"{left_table}."
                    f"{right_table}_id = {right_table}.id\n"
                )
        return query

    def __get_parameters_query(self) -> str:
        """
        Return the parameters query string.

        Returns
        -------
        parameters_query : str
            The SQL-string which can be used to query where table in databases are
            joined through `INNER JOIN` operations
        """
        parameter_connections = {"parameters": self.__table_connections["parameters"]}
        parameters_query = self.get_join_query(
            "parameters",
            self.__parameters_columns,
            self.__parameters_columns,
            parameter_connections,
        )
        return parameters_query

    def __get_sorted_columns(self) -> Tuple[str, ...]:
        """
        Return all columns sorted.

        The columns will be sorted alphabetically first by table name,
        then alphabetically by column name, with the following exceptions:

        1. The columns from the run table is presented first
        2. The id column is the first column in the table

        Returns
        -------
        tuple
            Dict containing the column names
            On the form

            >>> ('run.id',
            ...  'run.column_name_1',
            ...  'run.column_name_2',
            ...  ...
            ...  'table_name_1.column_name_1',
            ...  'table_name_1.column_name_2', ...)
        """
        sorted_columns: List[str] = list()
        table_names = sorted(self.table_column_dict.keys())
        table_names.pop(table_names.index("run"))
        table_names.insert(0, "run")
        for table_name in table_names:
            table_columns = list()
            for column_name in sorted(self.table_column_dict[table_name]):
                table_columns.append(f"{table_name}.{column_name}")
            table_columns.pop(table_columns.index(f"{table_name}.id"))
            table_columns.insert(0, f"{table_name}.id")
            sorted_columns = [*sorted_columns, *table_columns]
        return tuple(sorted_columns)

    def __get_table_connections(self) -> Dict[str, Tuple[str, ...]]:
        """
        Return a dict containing the table connections.

        Returns
        -------
        table_connection_dict : dict
            A dict telling which tables are connected to each other, where the key
            is the table under consideration and the value is a tuple containing the
            tables which have a key connection to the table under consideration
            On the form

            >>> {'table_1': ('table_2', 'table_3'),
            ...  'table_4': ('table_5',), ...}

        Raises
        ------
        RuntimeError
            If match is None
        """
        table_connection_dict = dict()
        pattern = re.compile("(.*)_id")

        for table, columns in self.table_column_dict.items():
            ids: List[str] = list()
            for column in columns:
                if "_id" in column:
                    match = pattern.match(column)
                    if match is None:
                        msg = f"match is None for '(.*)_id' for input '{column}'"
                        logging.critical(msg)
                        raise RuntimeError(msg)
                    ids.append(match[1])
            if len(ids) > 0:
                table_connection_dict[table] = tuple(ids)

        return table_connection_dict

    def __get_all_table_names(self) -> Tuple[str, ...]:
        """
        Return all the table names in the schema.

        Returns
        -------
        tuple
            A tuple containing all names of the tables
        """
        query = (
            "SELECT name FROM sqlite_master\n"
            "WHERE\n"
            "    type ='table' AND\n"
            "    name NOT LIKE 'sqlite_%'"
        )
        # pylint: disable=no-member
        return tuple(self.__db_reader.query(query).loc[:, "name"])

    def __get_table_column_dict(self) -> Dict[str, Tuple[str, ...]]:
        """
        Return all the column names of the specified tables.

        Returns
        -------
        table_column_dict : dict of tuple
            Dict containing the column names
            On the form

            >>> {'table_1': ('table_1_column_1', ...),
            ...  'table_2': ('table_2_column_1', ...), ...}
        """
        table_column_dict = dict()

        query = "SELECT name FROM pragma_table_info('{}')"

        for table_name in self.table_names:
            # pylint: disable=no-member
            table_column_dict[table_name] = tuple(
                self.__db_reader.query(query.format(table_name)).loc[:, "name"]
            )

        return table_column_dict

コード例 #11

0

ファイルを表示

ファイル: integration.py プロジェクト: CELMA-project/bout_runners

def large_graph_tester(
    submitter_type: Type[AbstractSubmitter],
    make_project: Path,
    yield_number_of_rows_for_all_tables: Callable[[DatabaseReader], Dict[str, int]],
    file_state_restorer: FileStateRestorer,
) -> None:
    """
    Test that the graph with 10 nodes work as expected.

    The node setup can be found in node_functions.py

    Parameters
    ----------
    submitter_type : type
        Used to assert that the correct submitter is used
    make_project : Path
        The path to the conduction example
    yield_number_of_rows_for_all_tables : function
        Function which returns the number of rows for all tables in a schema
    file_state_restorer : FileStateRestorer
        Object for restoring files to original state
    """
    name = f"test_large_graph_{submitter_type.__name__}"

    node_adder = LargeGraphNodeAdder(
        name, make_project, submitter_type, file_state_restorer
    )
    # RunGroup belonging to node 2
    node_adder.add_and_assert_node_group_2()
    # RunGroup belonging to node 3 and 4
    node_adder.add_and_assert_node_group_3_and_4()
    # RunGroup belonging to node 6
    node_8 = node_adder.add_and_assert_node_group_6()
    # RunGroup belonging to node 9
    node_adder.add_and_assert_node_node_9(node_8)
    # Run the project
    runner = BoutRunner(node_adder.run_graph)
    runner.run()
    runner.wait_until_completed()
    # Check that all the nodes have changed status
    with pytest.raises(RuntimeError):
        runner.run()
        runner.wait_until_completed()
    # Check that all files are present
    # Check that the pre and post files are present
    for node in (0, 1, 5, 7, 8, 10):
        assert (
            node_adder.paths["pre_and_post_directory"].joinpath(f"{node}.txt").is_file()
        )
    # Check that all the dump files are present
    for restart_str in ("", "_restart_0", "_restart_1", "_restart_2"):
        assert (
            node_adder.paths["project_path"]
            .joinpath(f"{name}{restart_str}", "BOUT.dmp.0.nc")
            .is_file()
            or node_adder.paths["project_path"]
            .joinpath(f"{name}{restart_str}", "BOUT.dmp.0.h5")
            .is_file()
        )
    # NOTE: We will only have 4 runs as node 4 is a duplicate of node 2 and will
    #       therefore be skipped
    number_of_runs = 4
    assert_tables_have_expected_len(
        DatabaseReader(node_adder.run_groups["run_group_2"].db_connector),
        yield_number_of_rows_for_all_tables,
        expected_run_number=number_of_runs,
        restarted=True,
    )
    simulation_steps = LogReader(
        node_adder.paths["project_path"].joinpath(f"{name}_restart_2", "BOUT.log.0")
    ).get_simulation_steps()
    # NOTE: nout=0 set in the function tests.utils.run.make_run_group
    assert np.isclose(simulation_steps.loc[simulation_steps.index[-1], "Sim_time"], 0.0)

コード例 #12

0

ファイルを表示

ファイル: metadata_recorder.py プロジェクト: fossabot/bout_runners

class MetadataRecorder:
    r"""
    Class for recording the metadata of the runs.

    Attributes
    ----------
    __db_writer : DatabaseWriter
        Getter variable for db_writer
    __db_reader : DatabaseReader
        Getter variable for db_reader
    db_writer : DatabaseWriter
        Object which writes to the database
    db_reader : DatabaseReader
        Object which reads from the database

    Methods
    -------
    capture_new_data_from_run(runner, processor_split)
        Capture new data from a run
    _create_parameter_tables_entry(parameters_dict)
        Insert the parameters into a the parameter tables

    Examples
    --------
    Import dependencies

    >>> from pathlib import Path
    >>> from bout_runners.executor.bout_paths import BoutPaths
    >>> from bout_runners.parameters.default_parameters import DefaultParameters
    >>> from bout_runners.parameters.final_parameters import FinalParameters
    >>> from bout_runners.database.database_connector import DatabaseConnector
    >>> from bout_runners.submitter.processor_split import ProcessorSplit

    Create the `bout_paths` object

    >>> project_path = Path().joinpath('path', 'to', 'project')
    >>> bout_inp_src_dir = Path().joinpath('path', 'to', 'source', 'BOUT.inp')
    >>> bout_inp_dst_dir = Path().joinpath('path', 'to', 'destination','BOUT.inp')
    >>> bout_paths = BoutPaths(project_path=project_path,
    ...                        bout_inp_src_dir=bout_inp_src_dir,
    ...                        bout_inp_dst_dir=bout_inp_dst_dir)

    Obtain the parameters

    >>> default_parameters = DefaultParameters(bout_paths)
    >>> final_parameters = FinalParameters(default_parameters)
    >>> final_parameters_dict = final_parameters.get_final_parameters()
    >>> final_parameters_as_sql_types = \
    ...     final_parameters.cast_to_sql_type(
    ...     final_parameters_dict)

    Create the metadata recorder object

    >>> db_connector = DatabaseConnector('name')
    >>> metadata_recorder = MetadataRecorder(db_connector,
    ...                                      bout_paths,
    ...                                      final_parameters)

    Capture the data to the database

    >>> metadata_recorder.capture_new_data_from_run(ProcessorSplit())
    None
    """
    def __init__(
        self,
        db_connector: DatabaseConnector,
        bout_paths: BoutPaths,
        final_parameters: FinalParameters,
    ) -> None:
        """
        Set the database to use.

        Parameters
        ----------
        db_connector : DatabaseConnector
            The database connector
        bout_paths : BoutPaths
            Object containing the paths
        final_parameters : FinalParameters
            Object containing the final parameters
        """
        self.__db_writer = DatabaseWriter(db_connector)
        self.__db_reader = DatabaseReader(db_connector)
        self.__bout_paths = bout_paths
        self.__final_parameters = final_parameters
        self.__make = Make(self.__bout_paths.project_path)

    @property
    def db_reader(self) -> DatabaseReader:
        """
        Set the properties of self.db_reader.

        Returns
        -------
        self.__db_reader : DatabaseReader
            The database reader object

        Notes
        -----
        The db_reader is read only
        """
        return self.__db_reader

    @property
    def db_writer(self):
        """
        Set the properties of self.db_writer.

        Returns
        -------
        self.__db_writer : DatabaseWriter
            The database writer object

        Notes
        -----
        The db_writer is read only
        """
        return self.__db_writer

    def capture_new_data_from_run(
        self,
        processor_split: ProcessorSplit,
        restart: bool = False,
        force: bool = False,
    ) -> Optional[int]:
        """
        Capture new data from a run.

        This function will capture all uncaptured data from a run.
        If all data has been captured previously, it means that the run has already
        been executed, and new_entry = False will be returned.

        Parameters
        ----------
        processor_split : ProcessorSplit
            The processor split object
        restart : bool
            If True, the data will be captured (even if it has been executed before)
        force : bool
            Store entry to the run table even if a entry with the same parameter exists
            This will typically be used if the bout_runners is forcefully executing
            a run

        Returns
        -------
        run_id : None or int
            If no previous run with the same configuration has been executed,
            this will return None, else the run_id is returned
        """
        # Initiate the run_dict (will be filled with the ids)
        run_dict: Dict[str, Union[str, int, float, None]] = {
            "name": self.__bout_paths.bout_inp_dst_dir.name
        }

        # Update the parameters
        parameters_dict = self.__final_parameters.get_final_parameters()
        if restart:
            parameters_dict["global"]["restart"] = 1

        run_dict["parameters_id"] = self._create_parameter_tables_entry(
            parameters_dict)

        # Update the file_modification
        file_modification_dict = get_file_modification(
            self.__bout_paths.project_path,
            self.__make.makefile_path,
            self.__make.exec_name,
        )
        run_dict["file_modification_id"] = self.__db_reader.get_entry_id(
            "file_modification", file_modification_dict)
        if run_dict["file_modification_id"] is None:
            run_dict["file_modification_id"] = self.create_entry(
                "file_modification", file_modification_dict)

        # Update the split
        split_dict = {
            "number_of_processors": processor_split.number_of_processors,
            "number_of_nodes": processor_split.number_of_nodes,
            "processors_per_node": processor_split.processors_per_node,
        }
        run_dict["split_id"] = self.__db_reader.get_entry_id(
            "split", split_dict)
        if run_dict["split_id"] is None:
            run_dict["split_id"] = self.create_entry("split", split_dict)

        # Update the system info
        system_info_dict = get_system_info()
        run_dict["system_info_id"] = self.__db_reader.get_entry_id(
            "system_info", system_info_dict)
        if run_dict["system_info_id"] is None:
            run_dict["system_info_id"] = self.create_entry(
                "system_info", system_info_dict)

        # Update the run
        # NOTE: If restart is True, a new run_id will be given as the run_dict["name"]
        #       will be unique
        run_id = self.__db_reader.get_entry_id("run", run_dict)
        if force or run_id is None:
            run_dict["latest_status"] = "submitted"
            run_dict["submitted_time"] = datetime.now().isoformat()
            _ = self.create_entry("run", run_dict)

        return run_id

    def create_entry(
            self, table_name: str,
            entries_dict: Mapping[str, Union[int, str, float, None]]) -> int:
        """
        Create a database entry and return the entry id.

        Parameters
        ----------
        table_name : str
            Name of the table
        entries_dict : dict
            Dictionary containing the entries as key value pairs

        Returns
        -------
        entry_id : int
            The id of the newly created entry

        Raises
        ------
        RuntimeError
            If the newly created id could not be fetched
        """
        self.__db_writer.create_entry(table_name, entries_dict)
        entry_id = self.__db_reader.get_entry_id(table_name, entries_dict)
        if entry_id is None:
            raise RuntimeError("Could not fetch the newly created id")
        return entry_id

    def _create_parameter_tables_entry(
            self, parameters_dict: Dict[str, Dict[str, Union[int, str,
                                                             float]]]) -> int:
        """
        Insert the parameters into a the parameter tables.

        Parameters
        ----------
        parameters_dict : dict
            The dictionary on the form

            >>> {'section': {'parameter': 'value'}}

        Returns
        -------
        parameters_id : int
            The id key from the `parameters` table

        Notes
        -----
        All `:` will be replaced by `_` in the section names
        """
        parameters_foreign_keys = dict()
        parameter_sections = list(parameters_dict.keys())

        for section in parameter_sections:
            # Replace bad characters for SQL
            section_name = section.replace(":", "_")
            section_parameters = parameters_dict[section]
            section_id = self.__db_reader.get_entry_id(section_name,
                                                       section_parameters)
            if section_id is None:
                section_id = self.create_entry(section_name,
                                               section_parameters)

            parameters_foreign_keys[f"{section_name}_id"] = section_id

        # Update the parameters table
        parameters_id = self.__db_reader.get_entry_id("parameters",
                                                      parameters_foreign_keys)
        if parameters_id is None:
            parameters_id = self.create_entry("parameters",
                                              parameters_foreign_keys)

        return parameters_id

コード例 #13

0

ファイルを表示

ファイル: test_runner.py プロジェクト: fossabot/bout_runners

def test_large_graph(
    make_project: Path,
    yield_number_of_rows_for_all_tables: Callable[[DatabaseReader], Dict[str,
                                                                         int]],
    clean_default_db_dir: Path,
    tear_down_restart_directories: Callable[[Path], None],
) -> None:
    """
    Test that the graph with 10 nodes work as expected.

    The node setup can be found in node_functions.py

    Parameters
    ----------
    make_project : Path
        The path to the conduction example
    yield_number_of_rows_for_all_tables : function
        Function which returns the number of rows for all tables in a schema
    clean_default_db_dir : Path
        Path to the default database directory
    tear_down_restart_directories : function
        Function used for removal of restart directories
    """
    _ = clean_default_db_dir

    name = "test_large_graph"
    paths = dict()
    paths["project_path"] = make_project
    paths["pre_and_post_directory"] = paths["project_path"].joinpath(
        f"pre_and_post_{name}")
    paths["pre_and_post_directory"].mkdir()
    run_groups = dict()

    # RunGroup belonging to node 2
    run_groups["run_group_2"] = make_run_group(name, make_project)
    run_graph = run_groups["run_group_2"].run_graph
    paths["bout_run_directory_node_2"] = run_groups[
        "run_group_2"].bout_paths.bout_inp_dst_dir

    run_groups["run_group_2"].add_pre_processor({
        "function":
        node_zero,
        "args": (
            paths["bout_run_directory_node_2"],
            paths["pre_and_post_directory"],
        ),
        "kwargs":
        None,
    })
    run_groups["run_group_2"].add_pre_processor({
        "function":
        node_one,
        "args": (
            paths["bout_run_directory_node_2"],
            paths["pre_and_post_directory"],
        ),
        "kwargs":
        None,
    })
    run_groups["run_group_2"].add_post_processor({
        "function":
        node_five,
        "args": (
            paths["bout_run_directory_node_2"],
            paths["pre_and_post_directory"],
        ),
        "kwargs":
        None,
    })

    tear_down_restart_directories(paths["bout_run_directory_node_2"])

    # RunGroup belonging to node 3
    run_groups["run_group_3"] = make_run_group(
        name,
        make_project,
        run_graph,
        restart_from=run_groups["run_group_2"].bout_paths.bout_inp_dst_dir,
        waiting_for=run_groups["run_group_2"].bout_run_node_name,
    )

    # RunGroup belonging to node 4
    run_groups["run_group_4"] = make_run_group(name, make_project, run_graph)
    paths["bout_run_directory_node_4"] = run_groups[
        "run_group_4"].bout_paths.bout_inp_dst_dir

    # RunGroup belonging to node 6
    run_groups["run_group_6"] = make_run_group(
        name,
        make_project,
        run_graph,
        restart_from=run_groups["run_group_2"].bout_paths.bout_inp_dst_dir,
        waiting_for=run_groups["run_group_2"].bout_run_node_name,
    )
    paths["bout_run_directory_node_6"] = run_groups[
        "run_group_6"].bout_paths.bout_inp_dst_dir
    node_8 = run_groups["run_group_6"].add_post_processor(
        {
            "function":
            node_eight,
            "args": (
                paths["bout_run_directory_node_4"],
                paths["bout_run_directory_node_6"],
                paths["pre_and_post_directory"],
            ),
            "kwargs":
            None,
        },
        waiting_for=run_groups["run_group_4"].bout_run_node_name,
    )

    # RunGroup belonging to node 9
    # NOTE: We need the paths['bout_run_directory_node_9'] as an input in node 7
    #       As node 9 is waiting for node 7 we hard-code the name
    #       (as we will know what it will be)
    paths["bout_run_directory_node_9"] = paths["project_path"].joinpath(
        f"{name}_restart_2")
    # The function of node_seven belongs to RunGroup2, but takes
    # paths['bout_run_directory_node_9'] as an input
    node_7_name = run_groups["run_group_2"].add_post_processor({
        "function":
        node_seven,
        "args": (
            paths["bout_run_directory_node_2"],
            paths["bout_run_directory_node_9"],
            paths["pre_and_post_directory"],
        ),
        "kwargs":
        None,
    })
    run_groups["run_group_9"] = make_run_group(
        name,
        make_project,
        run_graph,
        restart_from=run_groups["run_group_6"].bout_paths.bout_inp_dst_dir,
        waiting_for=(
            run_groups["run_group_4"].bout_run_node_name,
            run_groups["run_group_6"].bout_run_node_name,
            node_7_name,
        ),
    )
    run_groups["run_group_9"].add_post_processor(
        {
            "function":
            node_ten,
            "args": (
                paths["bout_run_directory_node_9"],
                paths["pre_and_post_directory"],
            ),
            "kwargs":
            None,
        },
        waiting_for=node_8,
    )

    # Run the project
    runner = BoutRunner(run_graph)
    runner.run()

    # Check that all the nodes have changed status
    with pytest.raises(RuntimeError):
        runner.run()

    # Check that all files are present
    # Check that the pre and post files are present
    for node in (0, 1, 5, 7, 8, 10):
        assert paths["pre_and_post_directory"].joinpath(
            f"{node}.txt").is_file()
    # Check that all the dump files are present
    for restart_str in ("", "_restart_0", "_restart_1", "_restart_2"):
        assert (paths["project_path"].joinpath(
            f"{name}{restart_str}").joinpath("BOUT.dmp.0.nc").is_file()
                or paths["project_path"].joinpath(f"{name}{restart_str}").
                joinpath("BOUT.dmp.0.h5").is_file())

    # NOTE: We will only have 4 runs as node 4 is a duplicate of node 2 and will
    #       therefore be skipped
    number_of_runs = 4
    assert_tables_have_expected_len(
        DatabaseReader(run_groups["run_group_2"].db_connector),
        yield_number_of_rows_for_all_tables,
        expected_run_number=number_of_runs,
        restarted=True,
    )

コード例 #14

0

ファイルを表示

class StatusChecker:
    r"""
    Class to check and update the status of runs.

    Attributes
    ----------
    __db_connector : DatabaseConnector
        Connection to the database under consideration
    __db_reader : DatabaseReader
        Object to read the database with
    project_path : Path
        Path to the project

    Methods
    -------
    check_and_update_status()
        Check and update the status for the schema
    check_and_update_status_until_complete()
        Check and update the status until all runs are stopped
    __check_submitted(metadata_updater, submitted_to_check)
        Check the status of all runs which has status `submitted`
    __check_running(metadata_updater, running_to_check)
        Check the status of all runs which has status `running`
     __check_if_stopped(log_reader, metadata_updater)
        Check if a run has stopped
    check_if_running_or_errored(log_reader)
        Check if a run is still running or has errored

    Examples
    --------
    >>> from pathlib import Path
    >>> from bout_runners.database.database_connector import \
    ...     DatabaseConnector
    >>> db_connector = DatabaseConnector('name_of_db')
    >>> project_path = Path('path').joinpath('to', 'project')
    >>> status_checker = StatusChecker(db_connector, project_path)
    >>> status_checker.check_and_update_status()

    Any updates to the runs will be written to the database.
    Alternatively, one can run the program until all jobs have
    stopped by calling

    >>> status_checker.check_and_update_until_complete()
    """
    def __init__(
        self,
        db_connector: Optional[DatabaseConnector] = None,
        project_path: Optional[Union[Path, str]] = None,
    ) -> None:
        """
        Set connector, reader and a project path.

        Notes
        -----
        The StatusChecker instance only checks the project belonging to the same
        database schema grouped together by the `db_connector`

        Parameters
        ----------
        db_connector : DatabaseConnector
            Connection to the database
        project_path : Path
            Path to the project (the root directory with which usually contains the
            makefile and the executable)
        """
        self.__db_connector = (db_connector if db_connector is not None else
                               DatabaseConnector())
        self.__db_reader = DatabaseReader(self.__db_connector)
        self.__project_path = Path(
            project_path) if project_path is not None else Path()

    def check_and_update_status(self) -> None:
        """
        Check and update the status for the schema.

        Raises
        ------
        RuntimeError
            If the schema does not exist
        """
        # Check that run table exist
        if not self.__db_reader.check_tables_created():
            logging.error(
                "No tables found in %s",
                self.__db_reader.db_connector.db_path,
            )
            message = "Can not check the status of schemas that does not exist"
            raise RuntimeError(message)

        # Create place holder metadata_updater
        metadata_updater = MetadataUpdater(self.__db_connector, run_id=-1)

        # Check runs with status 'submitted'
        query = ("SELECT name, id AS run_id FROM run WHERE\n"
                 "latest_status = 'submitted' OR\n"
                 "latest_status = 'created'")
        submitted_to_check = self.__db_reader.query(query)
        self.__check_submitted(metadata_updater, submitted_to_check)

        # Check runs with status 'running'
        query = 'SELECT name, id FROM run WHERE latest_status = "running"'
        running_to_check = self.__db_reader.query(query)
        self.__check_running(metadata_updater, running_to_check)

    @staticmethod
    def get_query_string_for_non_errored_runs() -> str:
        """
        Return the query string for non errored results.

        Returns
        -------
        str
            Query string for non errored results
        """
        return ("SELECT name, id AS run_id FROM run WHERE\n"
                "latest_status = 'submitted' OR\n"
                "latest_status = 'created' OR\n"
                "latest_status = 'running'")

    def check_and_update_until_complete(self,
                                        seconds_between_update: int = 5
                                        ) -> None:
        """
        Check and update the status until all runs are stopped.

        Parameters
        ----------
        seconds_between_update : int
            Number of seconds before a new status check is performed
        """
        query = self.get_query_string_for_non_errored_runs()
        while len(self.__db_reader.query(query).index) != 0:
            self.check_and_update_status()
            time.sleep(seconds_between_update)

    def __check_submitted(self, metadata_updater: MetadataUpdater,
                          submitted_to_check: DataFrame) -> None:
        """
        Check the status of all runs which has status `submitted`.

        Parameters
        ----------
        metadata_updater : MetadataUpdater
            Object which updates the database
        submitted_to_check : DataFrame
            DataFrame containing the `id` and `name` of the runs with status `submitted`

        Raises
        ------
        RuntimeError
            In case log_reader.started() is True and log_reader.start_time is None
        """
        for name, run_id in submitted_to_check.itertuples(index=False):
            metadata_updater.run_id = run_id

            log_path = self.__project_path.joinpath(name, "BOUT.log.0")

            if log_path.is_file():
                log_reader = LogReader(log_path)
                if log_reader.started():
                    start_time = log_reader.start_time
                    # Assert to prevent "Incompatible types in assignment" with Optional
                    if start_time is None:
                        raise RuntimeError(
                            "log_reader.start_time is None although "
                            "log_reader.started is True")
                    metadata_updater.update_start_time(start_time)
                    latest_status = self.__check_if_stopped(
                        log_reader, metadata_updater)

                else:
                    # No started time is found in the log
                    latest_status = self.check_if_running_or_errored(
                        log_reader)
            else:
                # No log file exists
                # NOTE: This means that the execution is either in a
                #       queue or has failed the submission.
                #       For now, we still consider this as submitted
                #       This can maybe be decided by checking either the
                #       pid or the status from the submitter
                latest_status = "submitted"

            metadata_updater.update_latest_status(latest_status)

    def __check_running(self, metadata_updater: MetadataUpdater,
                        running_to_check: DataFrame) -> None:
        """
        Check the status of all runs which has status `running`.

        Parameters
        ----------
        metadata_updater : MetadataUpdater
            Object which updates the database
        running_to_check : DataFrame
            DataFrame containing the `id` and `name` of the runs with status `running`
        """
        for name, run_id in running_to_check.itertuples(index=False):
            metadata_updater.run_id = run_id
            log_path = self.__project_path.joinpath(name, "BOUT.log.0")
            log_reader = LogReader(log_path)
            latest_status = self.check_if_running_or_errored(log_reader)
            metadata_updater.update_latest_status(latest_status)

    def __check_if_stopped(self, log_reader: LogReader,
                           metadata_updater: MetadataUpdater) -> str:
        """
        Check if a run has stopped.

        Parameters
        ----------
        log_reader : LogReader
            The object which reads log files
        metadata_updater : MetadataUpdater
            Object which updates the database

        Returns
        -------
        latest_status : str
            The latest status

        Raises
        ------
        RuntimeError
            In case log_reader.ended() is True and log_reader.end_time is None
        """
        if log_reader.ended():
            end_time = log_reader.end_time
            # Assert to prevent "Incompatible types in assignment" with Optional
            if end_time is None:
                raise RuntimeError("log_reader.end_time is None although "
                                   "log_reader.ended() is True")
            metadata_updater.update_stop_time(end_time)
            latest_status = "complete"
        else:
            latest_status = self.check_if_running_or_errored(log_reader)
        return latest_status

    @staticmethod
    def check_if_running_or_errored(log_reader: LogReader) -> str:
        """
        Check if a run is still running or has errored.

        Parameters
        ----------
        log_reader : LogReader
            The object which reads log files

        Returns
        -------
        latest_status : str
            The latest status
        """
        pid = log_reader.pid
        if pid is None:
            latest_status = "created"
        elif psutil.pid_exists(pid):
            latest_status = "running"
        else:
            latest_status = "error"
        return latest_status

コード例 #15

0

ファイルを表示

ファイル: test_database_reader.py プロジェクト: fossabot/bout_runners

def test_db_reader(
    make_test_database: Callable[[str], DatabaseConnector],
    write_to_split: Callable[[str], DatabaseConnector],
) -> None:
    """
    Test we can create read from the database.

    Specifically this test that:
    1. We can make a query
    2. That an empty db has not been populated
    3. That a populated db has table entries
    4. Check that we can extract the id for a given set of values which exist
    5. Check that no id is returned if a given set of values is not found in the
       database
    6. That we can get the latest row id which has been written to

    Parameters
    ----------
    make_test_database : function
        Function which returns the database connection
    write_to_split : function
        Function returning the database connection where `split` has been populated
    """
    empty_db_connector = make_test_database("empty_read_test")
    empty_db_reader = DatabaseReader(empty_db_connector)

    # Check that we can make a query
    table = empty_db_reader.query("SELECT 1+1 AS col")
    assert table.loc[0, "col"] == 2  # pylint: disable=no-member

    # Check that the tables has not been created in an empty db
    assert not empty_db_reader.check_tables_created()

    db_connector = write_to_split("read_test")
    db_reader = DatabaseReader(db_connector)

    # Check that tables exist
    assert db_reader.check_tables_created()

    # As write_to_split writes to the split table, we can get the
    # written values with the following query
    table = db_reader.query("SELECT * FROM split")
    entries_dict = table.to_dict(orient="records")[0]

    # Remove the 'id'
    entries_dict.pop("id")

    row_id = db_reader.get_entry_id("split", entries_dict)
    assert row_id == 1

    # Modify entries_dict so that row_id returns None
    entries_dict[list(entries_dict.keys())[0]] += 1
    new_row_id = db_reader.get_entry_id("split", entries_dict)
    assert new_row_id is None

    # Assert that get_latest_row_id is working
    assert db_reader.get_latest_row_id() == 1

コード例 #16

0

ファイルを表示

ファイル: test_status_checker.py プロジェクト: CELMA-project/bout_runners

def test_status_checker(
    test_case: str,
    get_test_data_path: Path,
    get_test_db_copy: Callable[[str], DatabaseConnector],
    mock_pid_exists: Callable[[str], None],
    copy_test_case_log_file: Callable[[str], None],
) -> None:
    """
    Test the StatusChecker exhaustively (excluding raises and loop).

    Parameters
    ----------
    test_case : str
        Description of the test on the form

        >>> ('<log_file_present>_<pid_present_in_log>_'
        ...  '<started_time_present_in_log>_<ended_time_present_in_log>'
        ...  '_<whether_pid_exists>_<new_status>')

    get_test_data_path : Path
        Path to test data
    get_test_db_copy : function
        Function which returns a a database connector to the copy of the
        test database
    mock_pid_exists : function
        Function which sets up a monkeypatch for psutil.pid_exist
    copy_test_case_log_file : function
        Function which copies log files according to the test_case
    """
    project_path = get_test_data_path
    db_connector = get_test_db_copy(test_case)
    mock_pid_exists(test_case)
    copy_test_case_log_file(test_case)

    db_reader = DatabaseReader(db_connector)

    status_checker = StatusChecker(db_connector, project_path)
    status_checker.check_and_update_status()

    # Check that the correct status has been assigned to "running"
    # pylint: disable=no-member
    result = db_reader.query("SELECT latest_status FROM run WHERE name = "
                             "'testdata_5'").loc[0, "latest_status"]
    assert result == "running"

    # Check that the correct status has been assigned to "submitted"
    expected = test_case.split("_")[-1]
    # pylint: disable=no-member
    result = db_reader.query("SELECT latest_status FROM run WHERE name = "
                             "'testdata_6'").loc[0, "latest_status"]
    assert result == expected

    # Check that correct start_time has been set
    if "not_started" not in test_case:
        expected = str(datetime(2020, 5, 1, 17, 7, 10))
        # pylint: disable=no-member
        result = db_reader.query("SELECT start_time FROM run WHERE name = "
                                 "'testdata_6'").loc[0, "start_time"]
        assert expected == result

    # Check that correct end_time has been set
    if "not_ended" not in test_case and "complete" in test_case:
        expected = str(datetime(2020, 5, 1, 17, 7, 14))
        # pylint: disable=no-member
        result = db_reader.query("SELECT stop_time FROM run WHERE name = "
                                 "'testdata_6'").loc[0, "stop_time"]
        assert expected == result