def get_rows(db_file, table, col_name, col_value, selected_col="*"):
    query = "SELECT {} from {} where {} = ?".format(selected_col, table,
                                                    col_name)

    with connect_db_ctx(db_file) as cur:
        rows = cur.execute(query, (col_value, )).fetchall()
    return rows
Beispiel #2
0
 def get_retries(self, process, realisation_name):
     with connect_db_ctx(self._db_file) as cur:
         return cur.execute(
             "SELECT COUNT(*) from state "
             "WHERE run_name = ? AND proc_type = ? and status != ?",
             (realisation_name, process, const.Status.created.value),
         ).fetchone()[0]
Beispiel #3
0
 def is_task_complete(self, task):
     process, run_name, status = task
     with connect_db_ctx(self._db_file) as cur:
         completed_tasks = cur.execute(
             """SELECT COUNT (*) 
                       FROM state 
                       WHERE run_name = (?)
                        AND proc_type = (?)
                        AND status = (?)""",
             (run_name, process, const.Status.completed.value),
         ).fetchone()[0]
     return completed_tasks > 0
Beispiel #4
0
    def add_retries(self, n_max_retries: int):
        """Checks the database for failed tasks with less failures than the given n_max_retries.
        If any are found then the tasks are checked for any entries that are created, queued, running or completed.
        If any are found then nothing happens, if none are found then another created entry is added to the db.
        n_max_retries: The maximum number of retries a task can have"""
        with connect_db_ctx(self._db_file) as cur:
            errored = cur.execute(
                "SELECT run_name, proc_type "
                "FROM state, status_enum "
                "WHERE state.status = status_enum.id "
                "AND status_enum.state  = 'failed' "
            ).fetchall()

        failure_count = {}
        for run_name, proc_type in errored:
            key = "{}__{}".format(run_name, proc_type)
            if key not in failure_count.keys():
                failure_count.update({key: 0})
            failure_count[key] += 1

        with connect_db_ctx(self._db_file) as cur:
            for key, fail_count in failure_count.items():
                if fail_count >= n_max_retries:
                    continue
                run_name, proc_type = key.split("__")
                # Gets the number of entries for the task with state in [created, queued, running or completed]
                # Where completed has enum index 4, and the other 3 less than this
                # If any are found then don't add another entry
                not_failed_count = cur.execute(
                    "SELECT COUNT(*) "
                    "FROM state "
                    "WHERE run_name = (?)"
                    "AND proc_type = (?)"
                    "AND status <= (SELECT id FROM status_enum WHERE state = 'completed') ",
                    (run_name, proc_type),
                ).fetchone()[0]
                if not_failed_count == 0:
                    self._insert_task(cur, run_name, proc_type)
Beispiel #5
0
    def check_mgmt_db_progress(self):
        """Checks auto submit progress in the management db"""
        with connect_db_ctx(sim_struct.get_mgmt_db(self.stage_dir)) as cur:
            comp_count = [
                cur.execute(
                    "SELECT COUNT(*) "
                    "FROM state "
                    "WHERE status = ? "
                    "AND proc_type in (?{})".format(",?" *
                                                    (len(self.tasks) - 1)),
                    (i, *self.tasks),
                ).fetchone()[0] for i in range(1, 5)
            ]
            total_count = cur.execute(
                "SELECT COUNT(*) FROM state "
                "WHERE proc_type in (?{})".format(",?" *
                                                  (len(self.tasks) - 1)),
                (*self.tasks, ),
            ).fetchone()[0]

        return comp_count, total_count
Beispiel #6
0
    def populate(self, realisations, srf_files: Union[List[str], str] = []):
        """Initial population of the database with all realisations"""
        # for manual install, only one srf will be passed to srf_files as a string
        if isinstance(srf_files, str):
            srf_files = [srf_files]

        realisations.extend(
            [os.path.splitext(os.path.basename(srf))[0] for srf in srf_files]
        )

        if len(realisations) == 0:
            print("No realisations found - no entries inserted into db")
        else:
            with connect_db_ctx(self._db_file) as cur:
                procs_to_be_done = cur.execute(
                    """select * from proc_type_enum"""
                ).fetchall()

                for run_name in realisations:
                    for proc in procs_to_be_done:
                        if not self._does_task_exists(cur, run_name, proc[0]):
                            self._insert_task(cur, run_name, proc[0])
Beispiel #7
0
    def _check_dependancy_met(self, task, logger=get_basic_logger()):
        """Checks if all dependencies for the specified are met"""
        process, run_name = task
        process = Process(process)

        with connect_db_ctx(self._db_file) as cur:
            completed_tasks = cur.execute(
                """SELECT proc_type 
                          FROM status_enum, state 
                          WHERE state.status = status_enum.id
                           AND run_name = (?)
                           AND status_enum.state = 'completed'""",
                (run_name,),
            ).fetchall()
        logger.debug(
            "Considering task {} for realisation {}. Completed tasks as follows: {}".format(
                process, run_name, completed_tasks
            )
        )
        remaining_deps = process.get_remaining_dependencies(
            [const.ProcessType(x[0]) for x in completed_tasks]
        )
        logger.debug("{} has remaining deps: {}".format(task, remaining_deps))
        return len(remaining_deps) == 0
Beispiel #8
0
    def command_builder(
        self,
        allowed_tasks: List[const.ProcessType] = None,
        blocked_tasks: List[const.ProcessType] = None,
        allowed_states: List[const.Status] = None,
        blocked_states: List[const.Status] = None,
        allowed_ids: List[int] = None,
        blocked_ids: List[int] = None,
    ):
        """
        Allows for retrieving custom collections of database entries
        Allowed and blocked are mutually exclusive with allowed being used first. No error is raised if both are present
        If any list is empty this is treated as if it were None
        :param allowed_tasks, blocked_tasks: a list of process types to either block or exclusively allow
        :param allowed_states, blocked_states: a list of states to either block or exclusively allow
        :param allowed_ids, blocked_ids: a list of job ids to either block or exclusively allow
        :return: A list of Entry objects
        """

        base_command = (
            "SELECT run_name, proc_type, state.status, job_id "
            "FROM state, status_enum "
            "WHERE state.status = status_enum.id "
        )
        arguments = []

        if allowed_tasks is not None and len(allowed_tasks) > 0:
            allowed_tasks = [str(task.value) for task in allowed_tasks]
            base_command += " AND proc_type IN ({})".format(
                ",".join("?" * len(allowed_tasks))
            )
            arguments.extend(allowed_tasks)
        elif blocked_tasks is not None and len(blocked_tasks) > 0:
            blocked_tasks = [str(task.value) for task in blocked_tasks]
            base_command += " AND proc_type NOT IN ({})".format(
                ",".join("?" * len(blocked_tasks))
            )
            arguments.extend(blocked_tasks)

        if allowed_states is not None and len(allowed_states) > 0:
            allowed_states = [str(state.str_value) for state in allowed_states]
            base_command += " AND status_enum.state IN ({})".format(
                ",".join("?" * len(allowed_states))
            )
            arguments.extend(allowed_states)
        elif blocked_states is not None and len(blocked_states) > 0:
            blocked_states = [str(state.str_value) for state in blocked_states]
            base_command += " AND status_enum.state NOT IN ({})".format(
                ",".join("?" * len(blocked_states))
            )
            arguments.extend(blocked_states)

        if allowed_ids is not None and len(allowed_ids) > 0:
            allowed_ids = [str(state) for state in allowed_ids]
            base_command += " AND job_id IN ({})".format(
                ",".join("?" * len(allowed_ids))
            )
            arguments.extend(allowed_ids)
        elif blocked_ids is not None and len(blocked_ids) > 0:
            blocked_ids = [str(state) for state in blocked_ids]
            base_command += " AND job_id NOT IN ({})".format(
                ",".join("?" * len(blocked_ids))
            )
            arguments.extend(blocked_ids)

        with connect_db_ctx(self._db_file) as cur:
            result = cur.execute(base_command, arguments).fetchall()

        return [SchedulerTask(*entry) for entry in result]
Beispiel #9
0
    def init_db(cls, db_file: str, init_script: str):
        with connect_db_ctx(db_file) as cur:
            with open(init_script, "r") as f:
                cur.executescript(f.read())

        return cls(db_file)
Beispiel #10
0
 def insert(self, run_name: str, proc_type: int):
     """Inserts a task into the mgmt db"""
     with connect_db_ctx(self._db_file) as cur:
         self._insert_task(cur, run_name, proc_type)
Beispiel #11
0
    def get_runnable_tasks(
        self,
        allowed_rels,
        task_limit,
        update_files,
        allowed_tasks=None,
        logger=get_basic_logger(),
    ):
        """Gets all runnable tasks based on their status and their associated
        dependencies (i.e. other tasks have to be finished first)

        Returns a list of tuples (proc_type, run_name, state_str)
        """
        if allowed_tasks is None:
            allowed_tasks = list(const.ProcessType)
        allowed_tasks = [str(task.value) for task in allowed_tasks]

        if len(allowed_tasks) == 0:
            return []

        runnable_tasks = []
        offset = 0

        # "{}__{}" is intended to be the template for a unique string for every realisation and process type pair
        # Used to compare with database entries to prevent running a task that has already been submitted, but not
        # recorded
        tasks_waiting_for_updates = [
            "{}__{}".format(*(entry.split(".")[1:3])) for entry in update_files
        ]

        with connect_db_ctx(self._db_file) as cur:
            entries = cur.execute(
                """SELECT COUNT(*) 
                          FROM status_enum, state 
                          WHERE state.status = status_enum.id
                           AND proc_type IN (?{})
                           AND run_name LIKE (?)
                           AND status_enum.state = 'created'""".format(
                    ",?" * (len(allowed_tasks) - 1)
                ),
                (*allowed_tasks, allowed_rels),
            ).fetchone()[0]

            while len(runnable_tasks) < task_limit and offset < entries:
                db_tasks = cur.execute(
                    """SELECT proc_type, run_name 
                              FROM status_enum, state 
                              WHERE state.status = status_enum.id
                               AND proc_type IN (?{})
                               AND run_name LIKE (?)
                                   AND status_enum.state = 'created'
                                   LIMIT 100 OFFSET ?""".format(
                        ",?" * (len(allowed_tasks) - 1)
                    ),
                    (*allowed_tasks, allowed_rels, offset),
                ).fetchall()
                runnable_tasks.extend(
                    [
                        (*task, self.get_retries(*task))
                        for task in db_tasks
                        if self._check_dependancy_met(task, logger)
                        and "{}__{}".format(*task) not in tasks_waiting_for_updates
                    ]
                )
                offset += 100

        return runnable_tasks