def get_workflow_status(schd: 'Scheduler') -> Tuple[WorkflowStatus, str]: """Return the status of the provided workflow. This should be a short, concise description of the workflow state. Args: schd: The running workflow Returns: tuple - (state, state_msg) state: The WorkflowState. state_msg: Text describing the current state (may be an empty string). """ status = WorkflowStatus.RUNNING status_msg = '' if schd.is_paused: status = WorkflowStatus.PAUSED status_msg = 'Paused' elif schd.stop_mode is not None: status = WorkflowStatus.STOPPING status_msg = f'Stopping: {schd.stop_mode.explain()}' elif schd.pool.hold_point: status_msg = (WORKFLOW_STATUS_RUNNING_TO_HOLD % schd.pool.hold_point) elif schd.pool.stop_point: status_msg = (WORKFLOW_STATUS_RUNNING_TO_STOP % schd.pool.stop_point) elif schd.stop_clock_time is not None: status_msg = (WORKFLOW_STATUS_RUNNING_TO_STOP % time2str(schd.stop_clock_time)) elif schd.pool.stop_task_id: status_msg = (WORKFLOW_STATUS_RUNNING_TO_STOP % schd.pool.stop_task_id) elif schd.config and schd.config.final_point: status_msg = (WORKFLOW_STATUS_RUNNING_TO_STOP % schd.config.final_point) else: # fallback - running indefinitely status_msg = 'Running' return (status.value, status_msg) # type: ignore
def get_workflow_status(schd): """Return the status of the provided workflow. Args: schd (cylc.flow.Scheduler): The running workflow Returns: tuple - (state, state_msg) state (cylc.flow.workflow_status.WorkflowStatus): The WorkflowState. state_msg (str): Text describing the current state (may be an empty string). """ status = WorkflowStatus.RUNNING status_msg = '' if schd.is_paused: status = WorkflowStatus.PAUSED elif schd.stop_mode is not None: status = WorkflowStatus.STOPPING status_msg = f'Stopping: {schd.stop_mode.describe()}' elif schd.pool.hold_point: status_msg = (WORKFLOW_STATUS_RUNNING_TO_HOLD % schd.pool.hold_point) elif schd.pool.stop_point: status_msg = (WORKFLOW_STATUS_RUNNING_TO_STOP % schd.pool.stop_point) elif schd.stop_clock_time is not None: status_msg = (WORKFLOW_STATUS_RUNNING_TO_STOP % time2str(schd.stop_clock_time)) elif schd.pool.stop_task_id: status_msg = (WORKFLOW_STATUS_RUNNING_TO_STOP % schd.pool.stop_task_id) elif schd.config.final_point: status_msg = (WORKFLOW_STATUS_RUNNING_TO_STOP % schd.config.final_point) return (status.value, status_msg)
def get_suite_status(schd): """Return the status of the provided suite. Args: schd (cylc.flow.Scheduler): The running suite Returns: tuple - (state, state_msg) state (cylc.flow.suite_status.SuiteStatus): The SuiteState. state_msg (str): Text describing the current state (may be an empty string). """ status = SuiteStatus.RUNNING status_msg = '' if schd.pool.is_held: status = SuiteStatus.HELD elif schd.stop_mode is not None: status = SuiteStatus.STOPPING status_msg = f'Stopping: {schd.stop_mode.describe()}' elif schd.pool.hold_point: status_msg = (SUITE_STATUS_RUNNING_TO_HOLD % schd.pool.hold_point) elif schd.pool.stop_point: status_msg = (SUITE_STATUS_RUNNING_TO_STOP % schd.pool.stop_point) elif schd.stop_clock_time is not None: status_msg = (SUITE_STATUS_RUNNING_TO_STOP % time2str(schd.stop_clock_time)) elif schd.stop_task: status_msg = (SUITE_STATUS_RUNNING_TO_STOP % schd.stop_task) elif schd.config.final_point: status_msg = (SUITE_STATUS_RUNNING_TO_STOP % schd.config.final_point) return (status.value, status_msg)
def update_task_proxies(self, updated_tasks=None): """Update dynamic fields of task nodes/proxies. Args: updated_tasks (list): [cylc.flow.task_proxy.TaskProxy] Update task-node from corresponding given list of task proxy objects from the workflow task pool. """ if not updated_tasks: return tasks = self.data[self.workflow_id][TASKS] task_proxies = self.data[self.workflow_id][TASK_PROXIES] update_time = time() task_defs = {} # update task instance for itask in updated_tasks: name, point_string = TaskID.split(itask.identity) tp_id = ( f'{self.workflow_id}{ID_DELIM}{point_string}{ID_DELIM}{name}') if (tp_id not in task_proxies and tp_id not in self.added[TASK_PROXIES]): continue # Gather task definitions for elapsed time recalculation. if name not in task_defs: task_defs[name] = itask.tdef # Create new message and copy existing message content. tp_delta = self.updated[TASK_PROXIES].setdefault( tp_id, PbTaskProxy(id=tp_id)) tp_delta.stamp = f'{tp_id}@{update_time}' tp_delta.state = itask.state.status if tp_id in task_proxies: self.state_update_families.add( task_proxies[tp_id].first_parent) else: self.state_update_families.add( self.added[TASK_PROXIES][tp_id].first_parent) tp_delta.is_held = itask.state.is_held tp_delta.flow_label = itask.flow_label tp_delta.job_submits = itask.submit_num tp_delta.latest_message = itask.summary['latest_message'] tp_delta.jobs[:] = [ j_id for j_id in self.schd.job_pool.task_jobs.get(tp_id, []) if j_id not in task_proxies.get(tp_id, PbTaskProxy()).jobs ] prereq_list = [] for prereq in itask.state.prerequisites: # Protobuf messages populated within prereq_obj = prereq.api_dump(self.workflow_id) if prereq_obj: prereq_list.append(prereq_obj) tp_delta.prerequisites.extend(prereq_list) tp_delta.outputs = json.dumps({ trigger: is_completed for trigger, _, is_completed in itask.state.outputs.get_all() }) extras = {} if itask.tdef.clocktrigger_offset is not None: extras['Clock trigger time reached'] = ( itask.is_waiting_clock_done()) extras['Triggers at'] = time2str(itask.clock_trigger_time) for trig, satisfied in itask.state.external_triggers.items(): key = f'External trigger "{trig}"' if satisfied: extras[key] = 'satisfied' else: extras[key] = 'NOT satisfied' for label, satisfied in itask.state.xtriggers.items(): sig = self.schd.xtrigger_mgr.get_xtrig_ctx( itask, label).get_signature() extra = f'xtrigger "{label} = {sig}"' if satisfied: extras[extra] = 'satisfied' else: extras[extra] = 'NOT satisfied' tp_delta.extras = json.dumps(extras) # Recalculate effected task def elements elapsed time. for name, tdef in task_defs.items(): elapsed_time = task_mean_elapsed_time(tdef) if elapsed_time: t_id = f'{self.workflow_id}{ID_DELIM}{name}' t_delta = PbTask(stamp=f'{t_id}@{update_time}', mean_elapsed_time=elapsed_time) self.updated[TASKS].setdefault( t_id, PbTask(id=t_id)).MergeFrom(t_delta) tasks[t_id].MergeFrom(t_delta)
def _set_auto_restart(scheduler, restart_delay=None, mode=AutoRestartMode.RESTART_NORMAL): """Configure the workflow to automatically stop and restart. Restart handled by `workflow_auto_restart`. Args: scheduler (cylc.flow.scheduler.Scheduler): Scheduler instance of the running workflow. restart_delay (cylc.flow.parsec.DurationFloat): Workflow will wait a random period between 0 and `restart_delay` seconds before attempting to stop/restart in order to avoid multiple workflows restarting simultaneously. mode (str): Auto stop-restart mode. Return: bool: False if it is not possible to automatically stop/restart the workflow due to its configuration/runtime state. """ # Check that the workflow isn't already shutting down. if scheduler.stop_mode: return True # Force mode, stop the workflow now, don't restart it. if mode == AutoRestartMode.FORCE_STOP: LOG.critical('This workflow will be shutdown as the workflow ' 'host is unable to continue running it.\n' 'When another workflow host becomes available ' 'the workflow can be restarted by:\n' f' $ cylc play {scheduler.workflow}') if scheduler.auto_restart_time: LOG.info('Scheduled automatic restart canceled') scheduler.auto_restart_time = time() scheduler.auto_restart_mode = mode return True # Check workflow isn't already scheduled to auto-stop. if scheduler.auto_restart_time is not None: return True # Workflow host is condemned and workflow running in no detach mode. # Raise an error to cause the workflow to abort. # This should raise an "abort" event and return a non-zero code to the # caller still attached to the workflow process. if scheduler.options.no_detach: raise RuntimeError('Workflow host condemned in no detach mode') # Check workflow is able to be safely restarted. if not _can_auto_restart(): return False LOG.info('Workflow will automatically restart on a new host.') if restart_delay is not None and restart_delay != 0: if restart_delay > 0: # Delay shutdown by a random interval to avoid many # workflows restarting simultaneously. shutdown_delay = int(random() * restart_delay) # nosec else: # Un-documented feature, schedule exact restart interval for # testing purposes. shutdown_delay = abs(int(restart_delay)) shutdown_time = time() + shutdown_delay LOG.info('Workflow will restart in %ss (at %s)', shutdown_delay, time2str(shutdown_time)) scheduler.auto_restart_time = shutdown_time else: scheduler.auto_restart_time = time() scheduler.auto_restart_mode = AutoRestartMode.RESTART_NORMAL return True
def update(self, schd): """Update.""" self.update_time = time() global_summary = {} family_summary = {} task_summary, task_states = self._get_tasks_info(schd) all_states = [] ancestors_dict = schd.config.get_first_parent_ancestors() # Compute state_counts (total, and per cycle). state_count_totals = {} state_count_cycles = {} for point_string, c_task_states in task_states.items(): # For each cycle point, construct a family state tree # based on the first-parent single-inheritance tree c_fam_task_states = {} count = {} for key in c_task_states: state = c_task_states[key] if state is None: continue try: count[state] += 1 except KeyError: count[state] = 1 all_states.append(state) for parent in ancestors_dict.get(key, []): if parent == key: continue c_fam_task_states.setdefault(parent, set([])) c_fam_task_states[parent].add(state) state_count_cycles[point_string] = count for fam, child_states in c_fam_task_states.items(): f_id = TaskID.get(fam, point_string) state = extract_group_state(child_states) if state is None: continue try: famcfg = schd.config.cfg['runtime'][fam]['meta'] except KeyError: famcfg = {} description = famcfg.get('description') title = famcfg.get('title') family_summary[f_id] = { 'name': fam, 'description': description, 'title': title, 'label': point_string, 'state': state } state_count_totals = {} for point_string, count in list(state_count_cycles.items()): for state, state_count in count.items(): state_count_totals.setdefault(state, 0) state_count_totals[state] += state_count all_states.sort() for key, value in (('oldest cycle point string', schd.pool.get_min_point()), ('newest cycle point string', schd.pool.get_max_point()), ('newest runahead cycle point string', schd.pool.get_max_point_runahead())): if value: global_summary[key] = str(value) else: global_summary[key] = None if get_utc_mode(): global_summary['time zone info'] = TIME_ZONE_UTC_INFO else: global_summary['time zone info'] = TIME_ZONE_LOCAL_INFO global_summary['last_updated'] = self.update_time global_summary['run_mode'] = schd.config.run_mode() global_summary['states'] = all_states global_summary['namespace definition order'] = ( schd.config.ns_defn_order) global_summary['reloading'] = schd.pool.do_reload global_summary['state totals'] = state_count_totals # Extract suite and task URLs from config. global_summary['suite_urls'] = dict( (i, j['meta']['URL']) for (i, j) in schd.config.cfg['runtime'].items()) global_summary['suite_urls']['suite'] = schd.config.cfg['meta']['URL'] # Construct a suite status string for use by monitoring clients. if schd.pool.is_held: global_summary['status_string'] = SUITE_STATUS_HELD elif schd.stop_mode is not None: global_summary['status_string'] = SUITE_STATUS_STOPPING elif schd.pool.hold_point: global_summary['status_string'] = (SUITE_STATUS_RUNNING_TO_HOLD % schd.pool.hold_point) elif schd.pool.stop_point: global_summary['status_string'] = (SUITE_STATUS_RUNNING_TO_STOP % schd.pool.stop_point) elif schd.stop_clock_time is not None: global_summary['status_string'] = (SUITE_STATUS_RUNNING_TO_STOP % time2str(schd.stop_clock_time)) elif schd.stop_task: global_summary['status_string'] = (SUITE_STATUS_RUNNING_TO_STOP % schd.stop_task) elif schd.config.final_point: global_summary['status_string'] = (SUITE_STATUS_RUNNING_TO_STOP % schd.config.final_point) else: global_summary['status_string'] = SUITE_STATUS_RUNNING # Replace the originals (atomic update, for access from other threads). self.task_summary = task_summary self.global_summary = global_summary self.family_summary = family_summary self.state_count_totals = state_count_totals self.state_count_cycles = state_count_cycles