예제 #1
0
def get_workflow_status(schd: 'Scheduler') -> Tuple[WorkflowStatus, str]:
    """Return the status of the provided workflow.

    This should be a short, concise description of the workflow state.

    Args:
        schd: The running workflow

    Returns:
        tuple - (state, state_msg)

        state:
            The WorkflowState.
        state_msg:
            Text describing the current state (may be an empty string).

    """
    status = WorkflowStatus.RUNNING
    status_msg = ''

    if schd.is_paused:
        status = WorkflowStatus.PAUSED
        status_msg = 'Paused'
    elif schd.stop_mode is not None:
        status = WorkflowStatus.STOPPING
        status_msg = f'Stopping: {schd.stop_mode.explain()}'
    elif schd.pool.hold_point:
        status_msg = (WORKFLOW_STATUS_RUNNING_TO_HOLD % schd.pool.hold_point)
    elif schd.pool.stop_point:
        status_msg = (WORKFLOW_STATUS_RUNNING_TO_STOP % schd.pool.stop_point)
    elif schd.stop_clock_time is not None:
        status_msg = (WORKFLOW_STATUS_RUNNING_TO_STOP %
                      time2str(schd.stop_clock_time))
    elif schd.pool.stop_task_id:
        status_msg = (WORKFLOW_STATUS_RUNNING_TO_STOP % schd.pool.stop_task_id)
    elif schd.config and schd.config.final_point:
        status_msg = (WORKFLOW_STATUS_RUNNING_TO_STOP %
                      schd.config.final_point)
    else:
        # fallback - running indefinitely
        status_msg = 'Running'

    return (status.value, status_msg)  # type: ignore
예제 #2
0
def get_workflow_status(schd):
    """Return the status of the provided workflow.

    Args:
        schd (cylc.flow.Scheduler): The running workflow

    Returns:
        tuple - (state, state_msg)

        state (cylc.flow.workflow_status.WorkflowStatus):
            The WorkflowState.
        state_msg (str):
            Text describing the current state (may be an empty string).

    """
    status = WorkflowStatus.RUNNING
    status_msg = ''

    if schd.is_paused:
        status = WorkflowStatus.PAUSED
    elif schd.stop_mode is not None:
        status = WorkflowStatus.STOPPING
        status_msg = f'Stopping: {schd.stop_mode.describe()}'
    elif schd.pool.hold_point:
        status_msg = (WORKFLOW_STATUS_RUNNING_TO_HOLD % schd.pool.hold_point)
    elif schd.pool.stop_point:
        status_msg = (WORKFLOW_STATUS_RUNNING_TO_STOP % schd.pool.stop_point)
    elif schd.stop_clock_time is not None:
        status_msg = (WORKFLOW_STATUS_RUNNING_TO_STOP %
                      time2str(schd.stop_clock_time))
    elif schd.pool.stop_task_id:
        status_msg = (WORKFLOW_STATUS_RUNNING_TO_STOP % schd.pool.stop_task_id)
    elif schd.config.final_point:
        status_msg = (WORKFLOW_STATUS_RUNNING_TO_STOP %
                      schd.config.final_point)

    return (status.value, status_msg)
예제 #3
0
def get_suite_status(schd):
    """Return the status of the provided suite.

    Args:
        schd (cylc.flow.Scheduler): The running suite

    Returns:
        tuple - (state, state_msg)

        state (cylc.flow.suite_status.SuiteStatus):
            The SuiteState.
        state_msg (str):
            Text describing the current state (may be an empty string).

    """
    status = SuiteStatus.RUNNING
    status_msg = ''

    if schd.pool.is_held:
        status = SuiteStatus.HELD
    elif schd.stop_mode is not None:
        status = SuiteStatus.STOPPING
        status_msg = f'Stopping: {schd.stop_mode.describe()}'
    elif schd.pool.hold_point:
        status_msg = (SUITE_STATUS_RUNNING_TO_HOLD % schd.pool.hold_point)
    elif schd.pool.stop_point:
        status_msg = (SUITE_STATUS_RUNNING_TO_STOP % schd.pool.stop_point)
    elif schd.stop_clock_time is not None:
        status_msg = (SUITE_STATUS_RUNNING_TO_STOP %
                      time2str(schd.stop_clock_time))
    elif schd.stop_task:
        status_msg = (SUITE_STATUS_RUNNING_TO_STOP % schd.stop_task)
    elif schd.config.final_point:
        status_msg = (SUITE_STATUS_RUNNING_TO_STOP % schd.config.final_point)

    return (status.value, status_msg)
예제 #4
0
    def update_task_proxies(self, updated_tasks=None):
        """Update dynamic fields of task nodes/proxies.

        Args:
            updated_tasks (list): [cylc.flow.task_proxy.TaskProxy]
                Update task-node from corresponding given list of
                task proxy objects from the workflow task pool.

        """
        if not updated_tasks:
            return
        tasks = self.data[self.workflow_id][TASKS]
        task_proxies = self.data[self.workflow_id][TASK_PROXIES]
        update_time = time()
        task_defs = {}

        # update task instance
        for itask in updated_tasks:
            name, point_string = TaskID.split(itask.identity)
            tp_id = (
                f'{self.workflow_id}{ID_DELIM}{point_string}{ID_DELIM}{name}')
            if (tp_id not in task_proxies
                    and tp_id not in self.added[TASK_PROXIES]):
                continue
            # Gather task definitions for elapsed time recalculation.
            if name not in task_defs:
                task_defs[name] = itask.tdef
            # Create new message and copy existing message content.
            tp_delta = self.updated[TASK_PROXIES].setdefault(
                tp_id, PbTaskProxy(id=tp_id))
            tp_delta.stamp = f'{tp_id}@{update_time}'
            tp_delta.state = itask.state.status
            if tp_id in task_proxies:
                self.state_update_families.add(
                    task_proxies[tp_id].first_parent)
            else:
                self.state_update_families.add(
                    self.added[TASK_PROXIES][tp_id].first_parent)
            tp_delta.is_held = itask.state.is_held
            tp_delta.flow_label = itask.flow_label
            tp_delta.job_submits = itask.submit_num
            tp_delta.latest_message = itask.summary['latest_message']
            tp_delta.jobs[:] = [
                j_id for j_id in self.schd.job_pool.task_jobs.get(tp_id, [])
                if j_id not in task_proxies.get(tp_id, PbTaskProxy()).jobs
            ]
            prereq_list = []
            for prereq in itask.state.prerequisites:
                # Protobuf messages populated within
                prereq_obj = prereq.api_dump(self.workflow_id)
                if prereq_obj:
                    prereq_list.append(prereq_obj)
            tp_delta.prerequisites.extend(prereq_list)
            tp_delta.outputs = json.dumps({
                trigger: is_completed
                for trigger, _, is_completed in itask.state.outputs.get_all()
            })
            extras = {}
            if itask.tdef.clocktrigger_offset is not None:
                extras['Clock trigger time reached'] = (
                    itask.is_waiting_clock_done())
                extras['Triggers at'] = time2str(itask.clock_trigger_time)
            for trig, satisfied in itask.state.external_triggers.items():
                key = f'External trigger "{trig}"'
                if satisfied:
                    extras[key] = 'satisfied'
                else:
                    extras[key] = 'NOT satisfied'
            for label, satisfied in itask.state.xtriggers.items():
                sig = self.schd.xtrigger_mgr.get_xtrig_ctx(
                    itask, label).get_signature()
                extra = f'xtrigger "{label} = {sig}"'
                if satisfied:
                    extras[extra] = 'satisfied'
                else:
                    extras[extra] = 'NOT satisfied'
            tp_delta.extras = json.dumps(extras)

        # Recalculate effected task def elements elapsed time.
        for name, tdef in task_defs.items():
            elapsed_time = task_mean_elapsed_time(tdef)
            if elapsed_time:
                t_id = f'{self.workflow_id}{ID_DELIM}{name}'
                t_delta = PbTask(stamp=f'{t_id}@{update_time}',
                                 mean_elapsed_time=elapsed_time)
                self.updated[TASKS].setdefault(
                    t_id, PbTask(id=t_id)).MergeFrom(t_delta)
                tasks[t_id].MergeFrom(t_delta)
예제 #5
0
def _set_auto_restart(scheduler,
                      restart_delay=None,
                      mode=AutoRestartMode.RESTART_NORMAL):
    """Configure the workflow to automatically stop and restart.

    Restart handled by `workflow_auto_restart`.

    Args:
        scheduler (cylc.flow.scheduler.Scheduler):
            Scheduler instance of the running workflow.
        restart_delay (cylc.flow.parsec.DurationFloat):
            Workflow will wait a random period between 0 and
            `restart_delay` seconds before attempting to stop/restart in
            order to avoid multiple workflows restarting simultaneously.
        mode (str): Auto stop-restart mode.

    Return:
        bool: False if it is not possible to automatically stop/restart
        the workflow due to its configuration/runtime state.
    """
    # Check that the workflow isn't already shutting down.
    if scheduler.stop_mode:
        return True

    # Force mode, stop the workflow now, don't restart it.
    if mode == AutoRestartMode.FORCE_STOP:
        LOG.critical('This workflow will be shutdown as the workflow '
                     'host is unable to continue running it.\n'
                     'When another workflow host becomes available '
                     'the workflow can be restarted by:\n'
                     f'    $ cylc play {scheduler.workflow}')
        if scheduler.auto_restart_time:
            LOG.info('Scheduled automatic restart canceled')
        scheduler.auto_restart_time = time()
        scheduler.auto_restart_mode = mode
        return True

    # Check workflow isn't already scheduled to auto-stop.
    if scheduler.auto_restart_time is not None:
        return True

    # Workflow host is condemned and workflow running in no detach mode.
    # Raise an error to cause the workflow to abort.
    # This should raise an "abort" event and return a non-zero code to the
    # caller still attached to the workflow process.
    if scheduler.options.no_detach:
        raise RuntimeError('Workflow host condemned in no detach mode')

    # Check workflow is able to be safely restarted.
    if not _can_auto_restart():
        return False

    LOG.info('Workflow will automatically restart on a new host.')
    if restart_delay is not None and restart_delay != 0:
        if restart_delay > 0:
            # Delay shutdown by a random interval to avoid many
            # workflows restarting simultaneously.
            shutdown_delay = int(random() * restart_delay)  # nosec
        else:
            # Un-documented feature, schedule exact restart interval for
            # testing purposes.
            shutdown_delay = abs(int(restart_delay))
        shutdown_time = time() + shutdown_delay
        LOG.info('Workflow will restart in %ss (at %s)', shutdown_delay,
                 time2str(shutdown_time))
        scheduler.auto_restart_time = shutdown_time
    else:
        scheduler.auto_restart_time = time()

    scheduler.auto_restart_mode = AutoRestartMode.RESTART_NORMAL

    return True
예제 #6
0
    def update(self, schd):
        """Update."""
        self.update_time = time()
        global_summary = {}
        family_summary = {}

        task_summary, task_states = self._get_tasks_info(schd)

        all_states = []
        ancestors_dict = schd.config.get_first_parent_ancestors()

        # Compute state_counts (total, and per cycle).
        state_count_totals = {}
        state_count_cycles = {}

        for point_string, c_task_states in task_states.items():
            # For each cycle point, construct a family state tree
            # based on the first-parent single-inheritance tree

            c_fam_task_states = {}

            count = {}

            for key in c_task_states:
                state = c_task_states[key]
                if state is None:
                    continue
                try:
                    count[state] += 1
                except KeyError:
                    count[state] = 1

                all_states.append(state)
                for parent in ancestors_dict.get(key, []):
                    if parent == key:
                        continue
                    c_fam_task_states.setdefault(parent, set([]))
                    c_fam_task_states[parent].add(state)

            state_count_cycles[point_string] = count

            for fam, child_states in c_fam_task_states.items():
                f_id = TaskID.get(fam, point_string)
                state = extract_group_state(child_states)
                if state is None:
                    continue
                try:
                    famcfg = schd.config.cfg['runtime'][fam]['meta']
                except KeyError:
                    famcfg = {}
                description = famcfg.get('description')
                title = famcfg.get('title')
                family_summary[f_id] = {
                    'name': fam,
                    'description': description,
                    'title': title,
                    'label': point_string,
                    'state': state
                }

        state_count_totals = {}
        for point_string, count in list(state_count_cycles.items()):
            for state, state_count in count.items():
                state_count_totals.setdefault(state, 0)
                state_count_totals[state] += state_count

        all_states.sort()

        for key, value in (('oldest cycle point string',
                            schd.pool.get_min_point()),
                           ('newest cycle point string',
                            schd.pool.get_max_point()),
                           ('newest runahead cycle point string',
                            schd.pool.get_max_point_runahead())):
            if value:
                global_summary[key] = str(value)
            else:
                global_summary[key] = None
        if get_utc_mode():
            global_summary['time zone info'] = TIME_ZONE_UTC_INFO
        else:
            global_summary['time zone info'] = TIME_ZONE_LOCAL_INFO
        global_summary['last_updated'] = self.update_time
        global_summary['run_mode'] = schd.config.run_mode()
        global_summary['states'] = all_states
        global_summary['namespace definition order'] = (
            schd.config.ns_defn_order)
        global_summary['reloading'] = schd.pool.do_reload
        global_summary['state totals'] = state_count_totals
        # Extract suite and task URLs from config.
        global_summary['suite_urls'] = dict(
            (i, j['meta']['URL'])
            for (i, j) in schd.config.cfg['runtime'].items())
        global_summary['suite_urls']['suite'] = schd.config.cfg['meta']['URL']

        # Construct a suite status string for use by monitoring clients.
        if schd.pool.is_held:
            global_summary['status_string'] = SUITE_STATUS_HELD
        elif schd.stop_mode is not None:
            global_summary['status_string'] = SUITE_STATUS_STOPPING
        elif schd.pool.hold_point:
            global_summary['status_string'] = (SUITE_STATUS_RUNNING_TO_HOLD %
                                               schd.pool.hold_point)
        elif schd.pool.stop_point:
            global_summary['status_string'] = (SUITE_STATUS_RUNNING_TO_STOP %
                                               schd.pool.stop_point)
        elif schd.stop_clock_time is not None:
            global_summary['status_string'] = (SUITE_STATUS_RUNNING_TO_STOP %
                                               time2str(schd.stop_clock_time))
        elif schd.stop_task:
            global_summary['status_string'] = (SUITE_STATUS_RUNNING_TO_STOP %
                                               schd.stop_task)
        elif schd.config.final_point:
            global_summary['status_string'] = (SUITE_STATUS_RUNNING_TO_STOP %
                                               schd.config.final_point)
        else:
            global_summary['status_string'] = SUITE_STATUS_RUNNING

        # Replace the originals (atomic update, for access from other threads).
        self.task_summary = task_summary
        self.global_summary = global_summary
        self.family_summary = family_summary
        self.state_count_totals = state_count_totals
        self.state_count_cycles = state_count_cycles