Ejemplo n.º 1
0
    def _get_number_of_rows_for_all_tables(
            db_reader: DatabaseReader) -> Dict[str, int]:
        """
        Return the number of rows for all tables in a schema.

        Parameters
        ----------
        db_reader : DatabaseReader
            The object used read from the database

        Returns
        -------
        number_of_rows_dict : dict
            Dict on the form

            >>> {'table_name_1': int, 'table_name_2': int, ...}
        """
        number_of_rows_dict = dict()
        query_str = ("SELECT name FROM sqlite_master\n"
                     "    WHERE type ='table'\n"
                     "    AND name NOT LIKE 'sqlite_%'")
        table_of_tables = db_reader.query(query_str)
        for _, table_name_as_series in table_of_tables.iterrows():
            table_name = table_name_as_series["name"]
            # NOTE: SQL injection possible through bad table name, however the table
            #       names are hard-coded in this example
            query_str = f"SELECT COUNT(*) AS rows FROM {table_name}"  # nosec
            table = db_reader.query(query_str)
            number_of_rows_dict[table_name] = table.loc[0, "rows"]
        return number_of_rows_dict
def test_db_writer(
        make_test_schema: Callable[[str], Tuple[DatabaseConnector,
                                                str]]) -> None:
    """
    Test we can create write to the database schemas.

    Specifically this test that:
    1. We can write to the `split` table
    2. That only one record is made
    3. That the type is correct
    4. Check that the values are correct
    5. Check that it's possible to update the values

    Parameters
    ----------
    make_test_schema : function
        Function returning the database connection with the schema created
    """
    db_connector, _ = make_test_schema("write_test")
    db_reader = DatabaseReader(db_connector)

    db_writer = DatabaseWriter(db_connector)
    table_name = "split"
    dummy_split_dict = {
        "number_of_processors": 41,
        "number_of_nodes": 42,
        "processors_per_node": 43,
    }
    db_writer.create_entry(table_name, dummy_split_dict)

    # NOTE: Protected against SQL injection as table_name is hard-coded above
    table = db_reader.query(f"SELECT * FROM {table_name}")  # nosec

    # Check that the shape is expected (note that one column is
    # assigned to the id)
    assert table.shape == (1, 4)

    # Check all the elements are the same
    # https://www.quora.com/How-do-you-check-if-all-elements-in-a-NumPy-array-are-the-same-in-Python-pandas
    values = table.dtypes.values
    assert (values == np.dtype("int64")).all()

    for key, value in dummy_split_dict.items():
        assert table.loc[0, key] == value  # pylint: disable=no-member

    update_fields = ("number_of_processors", "number_of_nodes")
    search_condition = (f"processors_per_node = "
                        f'{dummy_split_dict["processors_per_node"]}')
    values = tuple(dummy_split_dict[field] - 10 for field in update_fields)
    db_writer.update(
        db_writer.create_update_string(update_fields, table_name,
                                       search_condition),
        values,
    )
    # NOTE: Protected against SQL injection as table_name is hard-coded above
    table = db_reader.query(f"SELECT * FROM {table_name}")  # nosec
    for index, field in enumerate(update_fields):
        # pylint: disable=no-member
        assert table.loc[:, field].values[0] == values[index]
Ejemplo n.º 3
0
def test_db_reader(
    make_test_database: Callable[[str], DatabaseConnector],
    write_to_split: Callable[[str], DatabaseConnector],
) -> None:
    """
    Test we can create read from the database.

    Specifically this test that:
    1. We can make a query
    2. That an empty db has not been populated
    3. That a populated db has table entries
    4. Check that we can extract the id for a given set of values which exist
    5. Check that no id is returned if a given set of values is not found in the
       database
    6. That we can get the latest row id which has been written to

    Parameters
    ----------
    make_test_database : function
        Function which returns the database connection
    write_to_split : function
        Function returning the database connection where `split` has been populated
    """
    empty_db_connector = make_test_database("empty_read_test")
    empty_db_reader = DatabaseReader(empty_db_connector)

    # Check that we can make a query
    table = empty_db_reader.query("SELECT 1+1 AS col")
    assert table.loc[0, "col"] == 2  # pylint: disable=no-member

    # Check that the tables has not been created in an empty db
    assert not empty_db_reader.check_tables_created()

    db_connector = write_to_split("read_test")
    db_reader = DatabaseReader(db_connector)

    # Check that tables exist
    assert db_reader.check_tables_created()

    # As write_to_split writes to the split table, we can get the
    # written values with the following query
    table = db_reader.query("SELECT * FROM split")
    entries_dict = table.to_dict(orient="records")[0]

    # Remove the 'id'
    entries_dict.pop("id")

    row_id = db_reader.get_entry_id("split", entries_dict)
    assert row_id == 1

    # Modify entries_dict so that row_id returns None
    entries_dict[list(entries_dict.keys())[0]] += 1
    new_row_id = db_reader.get_entry_id("split", entries_dict)
    assert new_row_id is None

    # Assert that get_latest_row_id is working
    assert db_reader.get_latest_row_id() == 1
def test_status_checker_until_complete_infinite(
    get_test_data_path: Path,
    get_test_db_copy: Callable[[str], DatabaseConnector],
    copy_test_case_log_file: Callable[[str], None],
) -> None:
    """
    Test the infinite loop of StatusChecker.

    Parameters
    ----------
    get_test_data_path : Path
        Path to the test data
    get_test_db_copy : function
        Function which returns a DatabaseConnector connected to a copy of test.db
    copy_test_case_log_file : function
        Return the function for copying the test case log files
    """
    test_case = "infinite_log_file_pid_started_ended_no_mock_pid_complete"

    project_path = get_test_data_path
    db_connector = get_test_db_copy(test_case)
    copy_test_case_log_file(test_case)

    # Remove row which has status running (as it will always have
    # this status)
    db_connector.execute_statement("DELETE FROM run WHERE name = 'testdata_5'")

    db_reader = DatabaseReader(db_connector)

    status_checker = StatusChecker(db_connector, project_path)
    status_checker.check_and_update_until_complete()

    query = status_checker.get_query_string_for_non_errored_runs()
    assert len(db_reader.query(query).index) == 0
def test_db_creator(
    make_test_database: Callable[[str], DatabaseConnector],
    make_test_schema: Callable[
        [str], Tuple[DatabaseConnector, Dict[str, Dict[str, str]]]
    ],
) -> None:
    """
    Test we can create the database schemas.

    Specifically this test that:
    1. The database is empty on creation
    2. The tables are created
    3. It is not possible to create the schema more than once
    4. Check that all expected tables have been created

    Parameters
    ----------
    make_test_database : function
        Function returning the database connection
    make_test_schema : function
        Function returning the database connection and the final parameters as sql types
    """
    db_connector_no_schema = make_test_database("test_creation_without_schema")
    db_reader_no_schema = DatabaseReader(db_connector_no_schema)

    # There should be no tables before creating them
    assert not db_reader_no_schema.check_tables_created()

    db_connector_schema, final_parameters_as_sql_types = make_test_schema(
        "test_creation_with_schema"
    )
    db_reader_schema = DatabaseReader(db_connector_schema)
    db_creator = DatabaseCreator(db_connector_schema)

    # The tables should now have been created
    assert db_reader_schema.check_tables_created()

    with pytest.raises(sqlite3.OperationalError):
        db_creator.create_all_schema_tables(final_parameters_as_sql_types)

    # Check that all tables has been created
    non_parameter_tables = {
        "system_info",
        "split",
        "file_modification",
        "parameters",
        "run",
    }
    parameter_tables = set(
        el.replace(":", "_") for el in final_parameters_as_sql_types.keys()
    )
    query_str = 'SELECT name FROM sqlite_master WHERE type="table"'
    table = db_reader_schema.query(query_str)

    actual = table.loc[:, "name"].values  # pylint: disable=no-member
    assert non_parameter_tables.union(parameter_tables) == set(actual)
Ejemplo n.º 6
0
class MetadataReader:
    r"""
    Class for reading the metadata from the database.

    Attributes
    ----------
    __db_reader : DatabaseConnector
        The connection to the database
    __table_names : tuple
        Getter variable for table_names
    __table_column_dict : dict of tuple
        Getter variable for table_column_dict
    __table_connections : dict of tuple
        Getter variable for table_connections
    __sorted_columns : tuple
        Getter variable for sorted_columns
    table_names : tuple
         A tuple containing all names of the tables
    table_column_dict : dict of tuple
        A dict where the keys are table names, and the values are corresponding
        column names
    table_connections : dict of tuple
        A dict where the keys are tables, and the values are tuples of tables
        connected to the key table
    sorted_columns : tuple
        A tuple of the column names as they will be sorted in the all_metadata DataFrame
    date_columns : tuple
        Columns containing dates
    drop_id : None or str
        Specifies what id columns should be dropped when obtaining the metadata

    Methods
    -------
    get_all_metadata()
        Return all of the run metadata
    get_parameters_metadata()
        Return only the parameter part of the run metadata
    get_join_query(from_statement, columns, alias_columns, table_connections)
        Return the query string of a `SELECT` query with `INNER JOIN`
    __get_parameters_query()
        Return the parameters query string
    __get_sorted_columns()
        Return all columns sorted
    __get_table_connections()
        Return a dict containing the table connections
    __get_all_table_names()
        Return all the table names in the schema
    __get_table_column_dict()
        Return all the column names of the specified tables

    Examples
    --------
    >>> from pathlib import Path
    >>> from bout_runners.database.database_connector import DatabaseConnector
    >>> db_connector = DatabaseConnector('test', Path())
    >>> metadata_reader = MetadataReader(db_connector)
    >>> metadata_reader.get_parameters_metadata()
       bar.id  bar.foo  ... parameters.baz_id  parameters.foo_id
    0       1        1  ...                 1                  1
    1       2       10  ...                 1                  2
    2       2       10  ...                 1                  1

    [3 rows x 16 columns]

    >>> metadata_reader.get_all_metadata()
       run.id  ...                  system_info.version
    0       1  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    1       2  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    2       3  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    3       4  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    4       5  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    5       6  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    6       7  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019

    [7 rows x 43 columns]

    >>> metadata_reader.drop_id = 'all_id'
    >>> metadata_reader.get_all_metadata()
      run.latest_status  ...                  system_info.version
    0          complete  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    1          complete  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    2          complete  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    3          complete  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    4             error  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    5           running  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019
    6         submitted  ...  #1 SMP Thu Oct 17 19:31:58 UTC 2019

    [7 rows x 28 columns]
    """

    date_columns = (
        "run.start_time",
        "run.stop_time",
        "run.submitted_time",
        "file_modification.bout_lib_modified",
        "file_modification.project_executable_modified",
        "file_modification.project_makefile_modified",
    )

    def __init__(
        self,
        db_connector: DatabaseConnector,
        drop_id: Optional[str] = "keep_run_id",
    ) -> None:
        """
        Set the database to use.

        Parameters
        ----------
        db_connector : DatabaseConnector
            The connection to the database
        drop_id : None or str
            Specifies what id columns should be dropped when obtaining the metadata
            - None : No columns will be dropped
            - 'parameters' : All columns containing parameters ids
              will be dropped
            - 'keep_run_id' : Only the run.id of the id columns will be
              kept
            - 'all_id' : All id columns will be removed
        """
        self.drop_id = drop_id

        self.__db_reader = DatabaseReader(db_connector)

        self.__table_names = self.__get_all_table_names()
        self.__table_column_dict = self.__get_table_column_dict()
        self.__table_connections = self.__get_table_connections()
        self.__sorted_columns = self.__get_sorted_columns()

        parameters_connections = {"parameters": self.__table_connections["parameters"]}
        parameters_tables = ("parameters", *parameters_connections["parameters"])
        self.__parameters_columns = tuple(
            str(col)
            for col in self.__sorted_columns
            if col.split(".")[0] in parameters_tables
        )

    @property
    def table_names(self) -> Tuple[str, ...]:
        """
        Set the properties of self.table_names.

        Returns
        -------
        self.__table_names : tuple
            A tuple containing all names of the tables
        """
        return self.__table_names

    @property
    def table_column_dict(
        self,
    ) -> Dict[str, Tuple[str, ...]]:
        """
        Set the properties of self.table_column_dict.

        Returns
        -------
        self.__table_column_dict : dict of tuple
            A dict where the keys are table names, and the values are corresponding
            column names
        """
        return self.__table_column_dict

    @property
    def table_connection(
        self,
    ) -> Dict[str, Tuple[str, ...]]:
        """
        Set the properties of self.table_connections.

        Returns
        -------
        self.__table_connections : dict of tuple
            A dict where the keys are tables, and the values are tuples of tables
            connected to the key table
        """
        return self.__table_connections

    @property
    def sorted_columns(self) -> Tuple[str, ...]:
        """
        Set the properties of self.sorted_columns.

        Returns
        -------
        self.__sorted_columns : tuple
            A tuple of the column names as they will be sorted in the all_metadata
            DataFrame
        """
        return self.__sorted_columns

    @drop_ids
    def get_all_metadata(self):
        """
        Return all of the run metadata.

        Returns
        -------
        DataFrame
            The DataFrame of the run metadata
        """
        parameters_query = self.__get_parameters_query()

        # Adding spaces and parenthesis
        parameter_sub_query = "\n".join(
            [f'{" " * 6}{line}' for line in parameters_query.split("\n")]
        )
        parameter_sub_query = (
            f"{parameter_sub_query[:5]}({parameter_sub_query[6:-1]}) " f"AS subquery"
        )

        # NOTE: The subquery names are the names of the columns after
        #       the query. We would like to rename them to
        #       sorted_columns. Hence the `columns` field and
        #       `alias_columns` field appears swapped
        subquery_columns = [
            f'subquery."{col}"' if col in self.__parameters_columns else col
            for col in self.sorted_columns
        ]
        # Remove the parameters from the table_connection to avoid
        # double joining
        table_connections = self.__table_connections.copy()
        table_connections.pop("parameters")
        unfinished_all_metadata_query = self.get_join_query(
            "run", subquery_columns, self.sorted_columns, table_connections
        )

        # Update the parameters columns
        all_metadata_query = unfinished_all_metadata_query.replace(
            " parameters ", f"\n{parameter_sub_query}\n"
        ).replace("= parameters.id", '= subquery."parameters.id"')

        return self.__db_reader.query(all_metadata_query, parse_dates=self.date_columns)

    @drop_ids
    def get_parameters_metadata(self):
        """
        Return only the parameter part of the run metadata.

        Returns
        -------
        DataFrame
            The DataFrame of the parameter metadata
        """
        parameters_query = self.__get_parameters_query()

        return self.__db_reader.query(parameters_query)

    @staticmethod
    def get_join_query(
        from_statement: str,
        columns: Sequence[str],
        alias_columns: Sequence[str],
        table_connections: Dict[str, Tuple[str, ...]],
    ) -> str:
        """
        Return the query string of a `SELECT` query with `INNER JOIN`.

        Notes
        -----
        The tables in `table_connection` is assumed to be joined by `id`s. I.e.
        `table_a` is connected to `table_b` by `table_b` having a column named
        `table_a_id` which corresponds to the `id` column of `table_a`

        Parameters
        ----------
        from_statement : str
            The statement after the `FROM` keyword in the query
            I.e.

            >>> f'SELECT * FROM {from_statement}'

        columns : array_like
            The columns to select from the tables
            I.e.

            >>> f'SELECT {columns} FROM *'

        alias_columns : array_like
            The name of the columns in the resulting table
            I.e.

            >>> f'SELECT {columns[0]} AS {alias_columns[0]} FROM *'

        table_connections : dict
            A dict where the keys are the table names, and the values are tuples
            containing table names connected to the key table as described in the
            note above

        Returns
        -------
        query : str
            The SQL-string which can be used to query where table in databases are
            joined through `INNER JOIN` operations
        """
        query = "SELECT\n"
        for column, alias in zip(columns, alias_columns):
            query += f'{" " * 7}{column} AS "{alias}",\n'
        # Remove last comma
        query = f"{query[:-2]}\n"
        query += f"FROM {from_statement}\n"
        for left_table in table_connections.keys():
            for right_table in table_connections[left_table]:
                query += (
                    f'{" " * 4}INNER JOIN {right_table} ON '
                    f"{left_table}."
                    f"{right_table}_id = {right_table}.id\n"
                )
        return query

    def __get_parameters_query(self) -> str:
        """
        Return the parameters query string.

        Returns
        -------
        parameters_query : str
            The SQL-string which can be used to query where table in databases are
            joined through `INNER JOIN` operations
        """
        parameter_connections = {"parameters": self.__table_connections["parameters"]}
        parameters_query = self.get_join_query(
            "parameters",
            self.__parameters_columns,
            self.__parameters_columns,
            parameter_connections,
        )
        return parameters_query

    def __get_sorted_columns(self) -> Tuple[str, ...]:
        """
        Return all columns sorted.

        The columns will be sorted alphabetically first by table name,
        then alphabetically by column name, with the following exceptions:

        1. The columns from the run table is presented first
        2. The id column is the first column in the table

        Returns
        -------
        tuple
            Dict containing the column names
            On the form

            >>> ('run.id',
            ...  'run.column_name_1',
            ...  'run.column_name_2',
            ...  ...
            ...  'table_name_1.column_name_1',
            ...  'table_name_1.column_name_2', ...)
        """
        sorted_columns: List[str] = list()
        table_names = sorted(self.table_column_dict.keys())
        table_names.pop(table_names.index("run"))
        table_names.insert(0, "run")
        for table_name in table_names:
            table_columns = list()
            for column_name in sorted(self.table_column_dict[table_name]):
                table_columns.append(f"{table_name}.{column_name}")
            table_columns.pop(table_columns.index(f"{table_name}.id"))
            table_columns.insert(0, f"{table_name}.id")
            sorted_columns = [*sorted_columns, *table_columns]
        return tuple(sorted_columns)

    def __get_table_connections(self) -> Dict[str, Tuple[str, ...]]:
        """
        Return a dict containing the table connections.

        Returns
        -------
        table_connection_dict : dict
            A dict telling which tables are connected to each other, where the key
            is the table under consideration and the value is a tuple containing the
            tables which have a key connection to the table under consideration
            On the form

            >>> {'table_1': ('table_2', 'table_3'),
            ...  'table_4': ('table_5',), ...}

        Raises
        ------
        RuntimeError
            If match is None
        """
        table_connection_dict = dict()
        pattern = re.compile("(.*)_id")

        for table, columns in self.table_column_dict.items():
            ids: List[str] = list()
            for column in columns:
                if "_id" in column:
                    match = pattern.match(column)
                    if match is None:
                        msg = f"match is None for '(.*)_id' for input '{column}'"
                        logging.critical(msg)
                        raise RuntimeError(msg)
                    ids.append(match[1])
            if len(ids) > 0:
                table_connection_dict[table] = tuple(ids)

        return table_connection_dict

    def __get_all_table_names(self) -> Tuple[str, ...]:
        """
        Return all the table names in the schema.

        Returns
        -------
        tuple
            A tuple containing all names of the tables
        """
        query = (
            "SELECT name FROM sqlite_master\n"
            "WHERE\n"
            "    type ='table' AND\n"
            "    name NOT LIKE 'sqlite_%'"
        )
        # pylint: disable=no-member
        return tuple(self.__db_reader.query(query).loc[:, "name"])

    def __get_table_column_dict(self) -> Dict[str, Tuple[str, ...]]:
        """
        Return all the column names of the specified tables.

        Returns
        -------
        table_column_dict : dict of tuple
            Dict containing the column names
            On the form

            >>> {'table_1': ('table_1_column_1', ...),
            ...  'table_2': ('table_2_column_1', ...), ...}
        """
        table_column_dict = dict()

        query = "SELECT name FROM pragma_table_info('{}')"

        for table_name in self.table_names:
            # pylint: disable=no-member
            table_column_dict[table_name] = tuple(
                self.__db_reader.query(query.format(table_name)).loc[:, "name"]
            )

        return table_column_dict
Ejemplo n.º 7
0
class StatusChecker:
    r"""
    Class to check and update the status of runs.

    Attributes
    ----------
    __db_connector : DatabaseConnector
        Connection to the database under consideration
    __db_reader : DatabaseReader
        Object to read the database with
    project_path : Path
        Path to the project

    Methods
    -------
    check_and_update_status()
        Check and update the status for the schema
    check_and_update_status_until_complete()
        Check and update the status until all runs are stopped
    __check_submitted(metadata_updater, submitted_to_check)
        Check the status of all runs which has status `submitted`
    __check_running(metadata_updater, running_to_check)
        Check the status of all runs which has status `running`
     __check_if_stopped(log_reader, metadata_updater)
        Check if a run has stopped
    check_if_running_or_errored(log_reader)
        Check if a run is still running or has errored

    Examples
    --------
    >>> from pathlib import Path
    >>> from bout_runners.database.database_connector import \
    ...     DatabaseConnector
    >>> db_connector = DatabaseConnector('name_of_db')
    >>> project_path = Path('path').joinpath('to', 'project')
    >>> status_checker = StatusChecker(db_connector, project_path)
    >>> status_checker.check_and_update_status()

    Any updates to the runs will be written to the database.
    Alternatively, one can run the program until all jobs have
    stopped by calling

    >>> status_checker.check_and_update_until_complete()
    """
    def __init__(
        self,
        db_connector: Optional[DatabaseConnector] = None,
        project_path: Optional[Union[Path, str]] = None,
    ) -> None:
        """
        Set connector, reader and a project path.

        Notes
        -----
        The StatusChecker instance only checks the project belonging to the same
        database schema grouped together by the `db_connector`

        Parameters
        ----------
        db_connector : DatabaseConnector
            Connection to the database
        project_path : Path
            Path to the project (the root directory with which usually contains the
            makefile and the executable)
        """
        self.__db_connector = (db_connector if db_connector is not None else
                               DatabaseConnector())
        self.__db_reader = DatabaseReader(self.__db_connector)
        self.__project_path = Path(
            project_path) if project_path is not None else Path()

    def check_and_update_status(self) -> None:
        """
        Check and update the status for the schema.

        Raises
        ------
        RuntimeError
            If the schema does not exist
        """
        # Check that run table exist
        if not self.__db_reader.check_tables_created():
            logging.error(
                "No tables found in %s",
                self.__db_reader.db_connector.db_path,
            )
            message = "Can not check the status of schemas that does not exist"
            raise RuntimeError(message)

        # Create place holder metadata_updater
        metadata_updater = MetadataUpdater(self.__db_connector, run_id=-1)

        # Check runs with status 'submitted'
        query = ("SELECT name, id AS run_id FROM run WHERE\n"
                 "latest_status = 'submitted' OR\n"
                 "latest_status = 'created'")
        submitted_to_check = self.__db_reader.query(query)
        self.__check_submitted(metadata_updater, submitted_to_check)

        # Check runs with status 'running'
        query = 'SELECT name, id FROM run WHERE latest_status = "running"'
        running_to_check = self.__db_reader.query(query)
        self.__check_running(metadata_updater, running_to_check)

    @staticmethod
    def get_query_string_for_non_errored_runs() -> str:
        """
        Return the query string for non errored results.

        Returns
        -------
        str
            Query string for non errored results
        """
        return ("SELECT name, id AS run_id FROM run WHERE\n"
                "latest_status = 'submitted' OR\n"
                "latest_status = 'created' OR\n"
                "latest_status = 'running'")

    def check_and_update_until_complete(self,
                                        seconds_between_update: int = 5
                                        ) -> None:
        """
        Check and update the status until all runs are stopped.

        Parameters
        ----------
        seconds_between_update : int
            Number of seconds before a new status check is performed
        """
        query = self.get_query_string_for_non_errored_runs()
        while len(self.__db_reader.query(query).index) != 0:
            self.check_and_update_status()
            time.sleep(seconds_between_update)

    def __check_submitted(self, metadata_updater: MetadataUpdater,
                          submitted_to_check: DataFrame) -> None:
        """
        Check the status of all runs which has status `submitted`.

        Parameters
        ----------
        metadata_updater : MetadataUpdater
            Object which updates the database
        submitted_to_check : DataFrame
            DataFrame containing the `id` and `name` of the runs with status `submitted`

        Raises
        ------
        RuntimeError
            In case log_reader.started() is True and log_reader.start_time is None
        """
        for name, run_id in submitted_to_check.itertuples(index=False):
            metadata_updater.run_id = run_id

            log_path = self.__project_path.joinpath(name, "BOUT.log.0")

            if log_path.is_file():
                log_reader = LogReader(log_path)
                if log_reader.started():
                    start_time = log_reader.start_time
                    # Assert to prevent "Incompatible types in assignment" with Optional
                    if start_time is None:
                        raise RuntimeError(
                            "log_reader.start_time is None although "
                            "log_reader.started is True")
                    metadata_updater.update_start_time(start_time)
                    latest_status = self.__check_if_stopped(
                        log_reader, metadata_updater)

                else:
                    # No started time is found in the log
                    latest_status = self.check_if_running_or_errored(
                        log_reader)
            else:
                # No log file exists
                # NOTE: This means that the execution is either in a
                #       queue or has failed the submission.
                #       For now, we still consider this as submitted
                #       This can maybe be decided by checking either the
                #       pid or the status from the submitter
                latest_status = "submitted"

            metadata_updater.update_latest_status(latest_status)

    def __check_running(self, metadata_updater: MetadataUpdater,
                        running_to_check: DataFrame) -> None:
        """
        Check the status of all runs which has status `running`.

        Parameters
        ----------
        metadata_updater : MetadataUpdater
            Object which updates the database
        running_to_check : DataFrame
            DataFrame containing the `id` and `name` of the runs with status `running`
        """
        for name, run_id in running_to_check.itertuples(index=False):
            metadata_updater.run_id = run_id
            log_path = self.__project_path.joinpath(name, "BOUT.log.0")
            log_reader = LogReader(log_path)
            latest_status = self.check_if_running_or_errored(log_reader)
            metadata_updater.update_latest_status(latest_status)

    def __check_if_stopped(self, log_reader: LogReader,
                           metadata_updater: MetadataUpdater) -> str:
        """
        Check if a run has stopped.

        Parameters
        ----------
        log_reader : LogReader
            The object which reads log files
        metadata_updater : MetadataUpdater
            Object which updates the database

        Returns
        -------
        latest_status : str
            The latest status

        Raises
        ------
        RuntimeError
            In case log_reader.ended() is True and log_reader.end_time is None
        """
        if log_reader.ended():
            end_time = log_reader.end_time
            # Assert to prevent "Incompatible types in assignment" with Optional
            if end_time is None:
                raise RuntimeError("log_reader.end_time is None although "
                                   "log_reader.ended() is True")
            metadata_updater.update_stop_time(end_time)
            latest_status = "complete"
        else:
            latest_status = self.check_if_running_or_errored(log_reader)
        return latest_status

    @staticmethod
    def check_if_running_or_errored(log_reader: LogReader) -> str:
        """
        Check if a run is still running or has errored.

        Parameters
        ----------
        log_reader : LogReader
            The object which reads log files

        Returns
        -------
        latest_status : str
            The latest status
        """
        pid = log_reader.pid
        if pid is None:
            latest_status = "created"
        elif psutil.pid_exists(pid):
            latest_status = "running"
        else:
            latest_status = "error"
        return latest_status
def test_status_checker(
    test_case: str,
    get_test_data_path: Path,
    get_test_db_copy: Callable[[str], DatabaseConnector],
    mock_pid_exists: Callable[[str], None],
    copy_test_case_log_file: Callable[[str], None],
) -> None:
    """
    Test the StatusChecker exhaustively (excluding raises and loop).

    Parameters
    ----------
    test_case : str
        Description of the test on the form

        >>> ('<log_file_present>_<pid_present_in_log>_'
        ...  '<started_time_present_in_log>_<ended_time_present_in_log>'
        ...  '_<whether_pid_exists>_<new_status>')

    get_test_data_path : Path
        Path to test data
    get_test_db_copy : function
        Function which returns a a database connector to the copy of the
        test database
    mock_pid_exists : function
        Function which sets up a monkeypatch for psutil.pid_exist
    copy_test_case_log_file : function
        Function which copies log files according to the test_case
    """
    project_path = get_test_data_path
    db_connector = get_test_db_copy(test_case)
    mock_pid_exists(test_case)
    copy_test_case_log_file(test_case)

    db_reader = DatabaseReader(db_connector)

    status_checker = StatusChecker(db_connector, project_path)
    status_checker.check_and_update_status()

    # Check that the correct status has been assigned to "running"
    # pylint: disable=no-member
    result = db_reader.query("SELECT latest_status FROM run WHERE name = "
                             "'testdata_5'").loc[0, "latest_status"]
    assert result == "running"

    # Check that the correct status has been assigned to "submitted"
    expected = test_case.split("_")[-1]
    # pylint: disable=no-member
    result = db_reader.query("SELECT latest_status FROM run WHERE name = "
                             "'testdata_6'").loc[0, "latest_status"]
    assert result == expected

    # Check that correct start_time has been set
    if "not_started" not in test_case:
        expected = str(datetime(2020, 5, 1, 17, 7, 10))
        # pylint: disable=no-member
        result = db_reader.query("SELECT start_time FROM run WHERE name = "
                                 "'testdata_6'").loc[0, "start_time"]
        assert expected == result

    # Check that correct end_time has been set
    if "not_ended" not in test_case and "complete" in test_case:
        expected = str(datetime(2020, 5, 1, 17, 7, 14))
        # pylint: disable=no-member
        result = db_reader.query("SELECT stop_time FROM run WHERE name = "
                                 "'testdata_6'").loc[0, "stop_time"]
        assert expected == result