def test_is_valid_id(self): for id1 in ["a.1", "_.098098439535$#%#@!#~"]: self.assertTrue(TaskID.is_valid_id(id1)) for id2 in [ "abc", "123", "____", "_", "a_b", "a_1", "1_b", "ABC", "a.A A" ]: self.assertFalse(TaskID.is_valid_id(id2))
def test_is_valid_id(self): for id1 in [ "a.1", "_.098098439535$#%#@!#~" ]: self.assertTrue(TaskID.is_valid_id(id1)) for id2 in [ "abc", "123", "____", "_", "a_b", "a_1", "1_b", "ABC", "a.A A" ]: self.assertFalse(TaskID.is_valid_id(id2))
def test_is_valid_name(self): for name in [ "abc", "123", "____", "_", "a_b", "a_1", "1_b", "ABC" ]: self.assertTrue(TaskID.is_valid_name(name)) for name in [ "a.1", None, "%abc", "", " " ]: self.assertFalse(TaskID.is_valid_name(name))
def test_is_valid_id_2(self): # TBD: a.A A is invalid for valid_id, but valid for valid_id_2? # TBD: a/a.a is OK? for id1 in [ "a.1", "_.098098439535$#%#@!#~", "a/1", "_/098098439535$#%#@!#~", "a.A A", "a/a.a" ]: self.assertTrue(TaskID.is_valid_id_2(id1)) for id2 in ["abc", "123", "____", "_", "a_b", "a_1", "1_b", "ABC"]: self.assertFalse(TaskID.is_valid_id_2(id2))
def test_is_valid_id_2(self): # TBD: a.A A is invalid for valid_id, but valid for valid_id_2? # TBD: a/a.a is OK? for id1 in [ "a.1", "_.098098439535$#%#@!#~", "a/1", "_/098098439535$#%#@!#~", "a.A A", "a/a.a" ]: self.assertTrue(TaskID.is_valid_id_2(id1)) for id2 in [ "abc", "123", "____", "_", "a_b", "a_1", "1_b", "ABC" ]: self.assertFalse(TaskID.is_valid_id_2(id2))
def __init__(self, name, rtcfg, run_mode, start_point): if not TaskID.is_valid_name(name): raise TaskDefError("Illegal task name: %s" % name) self.run_mode = run_mode self.rtconfig = rtcfg self.start_point = start_point self.sequences = [] self.used_in_offset_trigger = False # some defaults self.max_future_prereq_offset = None self.sequential = False self.suite_polling_cfg = {} self.clocktrigger_offset = None self.expiration_offset = None self.namespace_hierarchy = [] self.dependencies = {} self.outputs = set() self.graph_children = {} self.graph_parents = {} self.param_var = {} self.external_triggers = [] self.xtrig_labels = {} # {sequence: [labels]} self.name = name self.elapsed_times = deque(maxlen=self.MAX_LEN_ELAPSED_TIMES)
def __init__(self, name, rtcfg, run_mode, start_point, spawn_ahead): if not TaskID.is_valid_name(name): raise TaskDefError("Illegal task name: %s" % name) self.run_mode = run_mode self.rtconfig = rtcfg self.start_point = start_point self.spawn_ahead = spawn_ahead self.sequences = [] self.used_in_offset_trigger = False # some defaults self.max_future_prereq_offset = None self.intercycle_offsets = set([]) self.sequential = False self.suite_polling_cfg = {} self.clocktrigger_offset = None self.expiration_offset = None self.namespace_hierarchy = [] self.dependencies = {} self.outputs = [] self.param_var = {} self.external_triggers = [] self.xtrig_labels = set() self.xclock_label = None # Note a task can only have one clock xtrigger - if it depends on # several we just keep the label of the one with the largest offset # (this is determined and set during suite config parsing, to avoid # storing the offset here in the taskdef). self.name = name self.elapsed_times = deque(maxlen=self.MAX_LEN_ELAPSED_TIMES)
def __init__(self, tdef, point, status, is_held): self.identity = TaskID.get(tdef.name, str(point)) self.status = status self.is_held = is_held self.is_updated = False self.time_updated = None self._is_satisfied = None self._suicide_is_satisfied = None # Prerequisites. self.prerequisites = [] self.suicide_prerequisites = [] self._add_prerequisites(point, tdef) # External Triggers. self.external_triggers = {} for ext in tdef.external_triggers: # Allow cycle-point-specific external triggers - GitHub #1893. if '$CYLC_TASK_CYCLE_POINT' in ext: ext = ext.replace('$CYLC_TASK_CYCLE_POINT', str(point)) # set unsatisfied self.external_triggers[ext] = False # xtriggers (represented by labels) satisfied or not self.xtriggers = {} self._add_xtriggers(point, tdef) # Message outputs. self.outputs = TaskOutputs(tdef) self.kill_failed = False
def _match_ext_trigger(self, itask): """Match external triggers for a waiting task proxy.""" if not self.ext_triggers or not itask.state.external_triggers: return False for trig, satisfied in list(itask.state.external_triggers.items()): if satisfied: continue for qmsg, qid in self.ext_triggers.copy(): if trig != qmsg: continue # Matched. point_string = TaskID.split(itask.identity)[1] # Set trigger satisfied. itask.state.external_triggers[trig] = True # Broadcast the event ID to the cycle point. if qid is not None: self.put_broadcast( [point_string], ['root'], [{ 'environment': { 'CYLC_EXT_TRIGGER_ID': qid } }], ) # Create data-store delta self.data_store_mgr.delta_task_ext_trigger( itask, qid, qmsg, True) self.ext_triggers[(qmsg, qid)] -= 1 if not self.ext_triggers[(qmsg, qid)]: del self.ext_triggers[(qmsg, qid)] return True return False
def update_task_proxies(self, task_ids=None): """Update dynamic task instance fields""" update_time = time() # update task instance for itask in self.schd.pool.get_all_tasks(): name, point_string = TaskID.split(itask.identity) if ((task_ids and itask.identity not in task_ids) or (itask.identity not in self.task_proxies)): continue ts = itask.get_state_summary() self.cycle_states.setdefault(point_string, {})[name] = ts['state'] tproxy = self.task_proxies[itask.identity] tproxy.checksum = f"{itask.identity}@{update_time}" tproxy.state = ts['state'] tproxy.job_submits = ts['submit_num'] tproxy.spawned = ast.literal_eval(ts['spawned']) tproxy.latest_message = ts['latest_message'] tproxy.jobs[:] = [ f"{self.workflow_id}/{job_id}" for job_id in itask.jobs] tproxy.broadcasts[:] = [ f"{key}={val}" for key, val in self.schd.task_events_mgr.broadcast_mgr.get_broadcast( itask.identity).items()] prereq_list = [] for prereq in itask.state.prerequisites: # Protobuf messages populated within prereq_obj = prereq.api_dump(self.workflow_id) if prereq_obj: prereq_list.append(prereq_obj) tproxy.prerequisites.extend(prereq_list) for _, msg, is_completed in itask.state.outputs.get_all(): tproxy.outputs.append(f"{msg}={is_completed}")
def match_ext_trigger(self, itask): """Match external triggers for a waiting task proxy.""" if not self.ext_triggers or not itask.state.external_triggers: return has_changed = False for trig, satisfied in list(itask.state.external_triggers.items()): if satisfied: continue for qmsg, qid in self.ext_triggers.copy(): if trig == qmsg: # Matched. point_string = TaskID.split(itask.identity)[1] # Set trigger satisfied. itask.state.external_triggers[trig] = True # Broadcast the event ID to the cycle point. if qid is not None: self.put_broadcast( [point_string], ['root'], [{'environment': {'CYLC_EXT_TRIGGER_ID': qid}}], ) self.ext_triggers[(qmsg, qid)] -= 1 if not self.ext_triggers[(qmsg, qid)]: del self.ext_triggers[(qmsg, qid)] has_changed = True break return has_changed
def remove_task_jobs(self, task_id): """removed all jobs associated with a task from the pool.""" name, point_string = TaskID.split(task_id) t_id = f"/{point_string}/{name}/" for job_d in self.pool.keys(): if t_id in job_d: del self.pool[job_d]
def __init__(self, tdef: 'TaskDef', start_point: 'PointBase', flow_label: Optional[str], status: str = TASK_STATUS_WAITING, is_held: bool = False, submit_num: int = 0, is_late: bool = False, reflow: bool = True) -> None: self.tdef = tdef if submit_num is None: submit_num = 0 self.submit_num = submit_num self.jobs: List[str] = [] self.flow_label = flow_label self.reflow = reflow self.point = start_point self.identity: str = TaskID.get(self.tdef.name, self.point) self.reload_successor: Optional['TaskProxy'] = None self.point_as_seconds: Optional[int] = None self.is_manual_submit = False self.summary: Dict[str, Any] = { 'submitted_time': None, 'submitted_time_string': None, 'started_time': None, 'started_time_string': None, 'finished_time': None, 'finished_time_string': None, 'logfiles': [], 'platforms_used': {}, 'execution_time_limit': None, 'job_runner_name': None, 'submit_method_id': None, 'flow_label': None } self.local_job_file_path: Optional[str] = None self.platform = get_platform() self.job_vacated = False self.poll_timer: Optional['TaskActionTimer'] = None self.timeout: Optional[float] = None self.try_timers: Dict[str, 'TaskActionTimer'] = {} self.non_unique_events = Counter() # type: ignore # TODO: figure out self.clock_trigger_time: Optional[float] = None self.expire_time: Optional[float] = None self.late_time: Optional[float] = None self.is_late = is_late self.waiting_on_job_prep = True self.state = TaskState(tdef, self.point, status, is_held) # Determine graph children of this task (for spawning). self.graph_children = generate_graph_children(tdef, self.point)
def main( parser: COP, options: 'Values', workflow: str, task_id: Optional[str] = None ) -> None: workflow, _ = parse_reg(workflow) pclient = get_client(workflow, timeout=options.comms_timeout) if task_id and not TaskID.is_valid_id(task_id): raise UserInputError("Invalid task ID: %s" % task_id) flow_kwargs = { 'request_string': FLOW_QUERY, 'variables': {'wFlows': [workflow]} } task_kwargs: Dict[str, Any] = { 'request_string': TASK_QUERY, } # cylc ping WORKFLOW result = pclient('graphql', flow_kwargs) msg = "" for flow in result['workflows']: w_name = flow['name'] w_port = flow['port'] w_pub_port = flow['pubPort'] if cylc.flow.flags.verbosity > 0: sys.stdout.write( f'{w_name} running on ' f'{pclient.host}:{w_port} {w_pub_port}\n' ) # cylc ping WORKFLOW TASKID if task_id: task, point = TaskID.split(task_id) w_id = flow['id'] task_kwargs['variables'] = { 'tProxy': f'{w_id}{ID_DELIM}{point}{ID_DELIM}{task}' } task_result = pclient('graphql', task_kwargs) if not task_result.get('taskProxy'): msg = "task not found" elif task_result['taskProxy']['state'] != TASK_STATUS_RUNNING: msg = f"task not {TASK_STATUS_RUNNING}" if msg: print(cparse(f'<red>{msg}</red>')) sys.exit(1)
def insert_db_job(self, row_idx, row): """Load job element from DB post restart.""" if row_idx == 0: LOG.info("LOADING job data") (point_string, name, status, submit_num, time_submit, time_run, time_run_exit, batch_sys_name, batch_sys_job_id, platform_name) = row if status not in JOB_STATUS_SET: return t_id = f'{self.workflow_id}{ID_DELIM}{point_string}{ID_DELIM}{name}' j_id = f'{t_id}{ID_DELIM}{submit_num}' try: tdef = self.schd.config.get_taskdef(name) j_owner = self.schd.owner if platform_name: j_host = get_host_from_platform(get_platform(platform_name)) else: j_host = self.schd.host j_buf = PbJob( stamp=f'{j_id}@{time()}', id=j_id, submit_num=submit_num, state=status, task_proxy=t_id, submitted_time=time_submit, started_time=time_run, finished_time=time_run_exit, batch_sys_name=batch_sys_name, batch_sys_job_id=batch_sys_job_id, host=j_host, owner=j_owner, name=name, cycle_point=point_string, ) # Add in log files. j_buf.job_log_dir = get_task_job_log(self.schd.suite, point_string, name, submit_num) overrides = self.schd.task_events_mgr.broadcast_mgr.get_broadcast( TaskID.get(name, point_string)) if overrides: rtconfig = pdeepcopy(tdef.rtconfig) poverride(rtconfig, overrides, prepend=True) else: rtconfig = tdef.rtconfig j_buf.extra_logs.extend([ os.path.expanduser(os.path.expandvars(log_file)) for log_file in rtconfig['extra log files'] ]) except SuiteConfigError: LOG.exception( ('ignoring job %s from the suite run database\n' '(its task definition has probably been deleted).') % j_id) except Exception: LOG.exception('could not load job %s' % j_id) else: self.added[j_id] = j_buf self.task_jobs.setdefault(t_id, set()).add(j_id) self.updates_pending = True
def main(parser: COP, options: 'Values', reg: str, shutdown_arg: Optional[str] = None) -> None: if shutdown_arg is not None and options.kill: parser.error("ERROR: --kill is not compatible with [STOP]") if options.kill and options.now: parser.error("ERROR: --kill is not compatible with --now") if options.flow_label and int(options.max_polls) > 0: parser.error("ERROR: --flow is not compatible with --max-polls") reg, _ = parse_reg(reg) pclient = get_client(reg, timeout=options.comms_timeout) if int(options.max_polls) > 0: # (test to avoid the "nothing to do" warning for # --max-polls=0) spoller = StopPoller(pclient, "workflow stopped", options.interval, options.max_polls) # mode defaults to 'Clean' mode = None task = None cycle_point = None if shutdown_arg is not None and TaskID.is_valid_id(shutdown_arg): # STOP argument detected task = shutdown_arg elif shutdown_arg is not None: # not a task ID, may be a cycle point cycle_point = shutdown_arg elif options.kill: mode = WorkflowStopMode.Kill.name elif options.now > 1: mode = WorkflowStopMode.NowNow.name elif options.now: mode = WorkflowStopMode.Now.name mutation_kwargs = { 'request_string': MUTATION, 'variables': { 'wFlows': [reg], 'stopMode': mode, 'cyclePoint': cycle_point, 'clockTime': options.wall_clock, 'task': task, 'flowLabel': options.flow_label, } } pclient('graphql', mutation_kwargs) if int(options.max_polls) > 0 and not spoller.poll(): # (test to avoid the "nothing to do" warning for # --max-polls=0) sys.exit(1)
def main(parser, options, workflow, shutdown_arg=None): if shutdown_arg is not None and options.kill: parser.error("ERROR: --kill is not compatible with [STOP]") if options.kill and options.now: parser.error("ERROR: --kill is not compatible with --now") if options.flow_label and int(options.max_polls) > 0: parser.error("ERROR: --flow is not compatible with --max-polls") workflow = os.path.normpath(workflow) pclient = get_client(workflow, timeout=options.comms_timeout) if int(options.max_polls) > 0: # (test to avoid the "nothing to do" warning for # --max-polls=0) spoller = StopPoller(pclient, "workflow stopped", options.interval, options.max_polls) # mode defaults to 'Clean' mode = None task = None cycle_point = None if shutdown_arg is not None and TaskID.is_valid_id(shutdown_arg): # STOP argument detected task = shutdown_arg elif shutdown_arg is not None: # not a task ID, may be a cycle point cycle_point = shutdown_arg elif options.kill: mode = 'Kill' elif options.now > 1: mode = 'NowNow' elif options.now: mode = 'Now' mutation_kwargs = { 'request_string': MUTATION, 'variables': { 'wFlows': [workflow], 'stopMode': mode, 'cyclePoint': cycle_point, 'clockTime': options.wall_clock, 'task': task, 'flowLabel': options.flow_label, } } pclient('graphql', mutation_kwargs) if int(options.max_polls) > 0: # (test to avoid the "nothing to do" warning for # --max-polls=0) if not spoller.poll(): sys.exit(1)
def _generate_ghost_families(self, family_proxies=None, cycle_points=None): """Generate the family proxies from tasks in cycle points.""" update_time = time() if family_proxies is None: family_proxies = {} fam_proxy_ids = {} for point_string, tasks in self.cycle_states.items(): # construct family tree based on the # first-parent single-inheritance tree if not cycle_points or point_string not in cycle_points: continue cycle_first_parents = set([]) for key in tasks: for parent in self.ancestors.get(key, []): if parent == key: continue cycle_first_parents.add(parent) for fam in cycle_first_parents: if fam not in self.families: continue int_id = TaskID.get(fam, point_string) fp_id = f"{self.workflow_id}/{point_string}/{fam}" fp_check = f"{int_id}@{update_time}" fproxy = PbFamilyProxy( checksum=fp_check, id=fp_id, cycle_point=point_string, name=fam, family=f"{self.workflow_id}/{fam}", depth=self.families[fam].depth, ) for child_name in self.descendants[fam]: ch_id = f"{self.workflow_id}/{point_string}/{child_name}" if self.parents[child_name][0] == fam: if child_name in cycle_first_parents: fproxy.child_families.append(ch_id) elif child_name in self.tasks: fproxy.child_tasks.append(ch_id) if self.parents[fam]: fproxy.parents.extend([ f"{self.workflow_id}/{point_string}/{p_name}" for p_name in self.parents[fam] ]) p1_name = self.parents[fam][0] fproxy.first_parent = ( f"{self.workflow_id}/{point_string}/{p1_name}") family_proxies[int_id] = fproxy fam_proxy_ids.setdefault(fam, []).append(fp_id) self.family_proxies = family_proxies for fam, ids in fam_proxy_ids.items(): self.families[fam].proxies[:] = ids
def main(parser, options, suite, task_id=None): pclient = SuiteRuntimeClient(suite, timeout=options.comms_timeout) if task_id and not TaskID.is_valid_id(task_id): raise UserInputError("Invalid task ID: %s" % task_id) flow_kwargs = { 'request_string': FLOW_QUERY, 'variables': { 'wFlows': [suite] } } task_kwargs = { 'request_string': TASK_QUERY, } # cylc ping SUITE result = pclient('graphql', flow_kwargs) msg = "" for flow in result['workflows']: w_name = flow['name'] w_port = flow['port'] w_pub_port = flow['pubPort'] if cylc.flow.flags.verbose: sys.stdout.write(f'{w_name} running on ' f'{pclient.host}:{w_port} {w_pub_port}\n') # cylc ping SUITE TASKID if task_id: task, point = TaskID.split(task_id) w_id = flow['id'] task_kwargs['variables'] = { 'tProxy': f'{w_id}{ID_DELIM}{point}{ID_DELIM}{task}' } task_result = pclient('graphql', task_kwargs) if not task_result.get('taskProxy'): msg = "task not found" elif task_result['taskProxy']['state'] != TASK_STATUS_RUNNING: msg = f"task not {TASK_STATUS_RUNNING}" if msg: print(cparse(f'<red>{msg}</red>')) sys.exit(1)
def _get_tasks_info(schd): """Retrieve task summary info and states.""" task_summary = {} task_states = {} for task in schd.pool.get_tasks(): ts = task.get_state_summary() task_summary[task.identity] = ts name, point_string = TaskID.split(task.identity) task_states.setdefault(point_string, {}) task_states[point_string][name] = ts['state'] for task in schd.pool.get_rh_tasks(): ts = task.get_state_summary() ts['state'] = TASK_STATUS_RUNAHEAD task_summary[task.identity] = ts name, point_string = TaskID.split(task.identity) task_states.setdefault(point_string, {}) task_states[point_string][name] = ts['state'] return task_summary, task_states
def main(parser, options, suite, *items): for i, item in enumerate(items): if not TaskID.is_valid_id_2(item): raise UserInputError( '"%s": invalid task ID (argument %d)' % (item, i + 1)) prompt('Insert %s in %s' % (items, suite), options.force) pclient = SuiteRuntimeClient( suite, options.owner, options.host, options.port) pclient( 'insert_tasks', {'items': items, 'no_check': options.no_check, 'stop_point_string': options.stop_point_string}, timeout=options.comms_timeout )
def insert_job(self, job_conf): """Insert job into pool.""" update_time = time() job_owner = job_conf['owner'] sub_num = job_conf['submit_num'] name, point_string = TaskID.split(job_conf['task_id']) t_id = f'{self.workflow_id}{ID_DELIM}{point_string}{ID_DELIM}{name}' j_id = f'{t_id}{ID_DELIM}{sub_num}' j_buf = PbJob( stamp=f'{j_id}@{update_time}', id=j_id, submit_num=sub_num, state=JOB_STATUSES_ALL[0], task_proxy=t_id, batch_sys_name=job_conf['batch_system_name'], env_script=job_conf['env-script'], err_script=job_conf['err-script'], exit_script=job_conf['exit-script'], execution_time_limit=job_conf['execution_time_limit'], host=job_conf['host'], init_script=job_conf['init-script'], job_log_dir=job_conf['job_log_dir'], owner=job_owner, post_script=job_conf['post-script'], pre_script=job_conf['pre-script'], script=job_conf['script'], work_sub_dir=job_conf['work_d'], name=name, cycle_point=point_string, ) j_buf.batch_sys_conf.extend([ f'{key}={val}' for key, val in job_conf['batch_system_conf'].items() ]) j_buf.directives.extend( [f'{key}={val}' for key, val in job_conf['directives'].items()]) j_buf.environment.extend( [f'{key}={val}' for key, val in job_conf['environment'].items()]) j_buf.param_env_tmpl.extend([ f'{key}={val}' for key, val in job_conf['param_env_tmpl'].items() ]) j_buf.param_var.extend( [f'{key}={val}' for key, val in job_conf['param_var'].items()]) j_buf.extra_logs.extend(job_conf['logfiles']) self.updates[j_id] = j_buf self.task_jobs.setdefault(t_id, set()).add(j_id) self.updates_pending = True
def main(parser, options, suite, shutdown_arg=None): if shutdown_arg is not None and options.kill: parser.error("ERROR: --kill is not compatible with [STOP]") if options.kill and options.now: parser.error("ERROR: --kill is not compatible with --now") pclient = SuiteRuntimeClient(suite, options.owner, options.host, options.port, options.comms_timeout) if int(options.max_polls) > 0: # (test to avoid the "nothing to do" warning for # --max-polls=0) spoller = StopPoller(pclient, "suite stopped", options.interval, options.max_polls) if options.wall_clock: prompt( 'Set shutdown at wall clock %s for %s' % (options.wall_clock, suite), options.force) pclient('set_stop_after_clock_time', {'datetime_string': options.wall_clock}) elif shutdown_arg is not None and TaskID.is_valid_id(shutdown_arg): # STOP argument detected prompt('Set shutdown after task %s for %s' % (shutdown_arg, suite), options.force) pclient('set_stop_after_task', {'task_id': shutdown_arg}) elif shutdown_arg is not None: # not a task ID, may be a cycle point prompt('Set shutdown at cycle point %s for %s' % (shutdown_arg, suite), options.force) pclient('set_stop_after_point', {'point_string': shutdown_arg}) elif options.now > 1: prompt('Shut down and terminate %s now' % suite, options.force) pclient('stop_now', {'terminate': True}) elif options.now: prompt('Shut down %s now' % suite, options.force) pclient('stop_now') else: prompt('Shut down %s' % suite, options.force) pclient('set_stop_cleanly', {'kill_active_tasks': options.kill}) if int(options.max_polls) > 0: # (test to avoid the "nothing to do" warning for # --max-polls=0) if not spoller.poll(): sys.exit(1)
def update_family_proxies(self, cycle_points=None): """Update state of family proxies""" update_time = time() # Compute state_counts (total, and per cycle). all_states = [] state_count_cycles = {} for point_string, c_task_states in self.cycle_states.items(): # For each cycle point, construct a family state tree # based on the first-parent single-inheritance tree if cycle_points and point_string not in cycle_points: continue c_fam_task_states = {} count = {} for key in c_task_states: state = c_task_states[key] if state is None: continue try: count[state] += 1 except KeyError: count[state] = 1 all_states.append(state) for parent in self.ancestors.get(key, []): if parent == key: continue c_fam_task_states.setdefault(parent, set([])) c_fam_task_states[parent].add(state) state_count_cycles[point_string] = count for fam, child_states in c_fam_task_states.items(): state = extract_group_state(child_states) int_id = TaskID.get(fam, point_string) if state is None or int_id not in self.family_proxies: continue fproxy = self.family_proxies[int_id] fproxy.checksum = f"{int_id}@{update_time}" fproxy.state = state self.all_states = all_states self.state_count_cycles = state_count_cycles
def insert_job(self, job_conf): """Insert job into pool.""" update_time = time() int_id = job_conf['job_d'] job_owner = job_conf['owner'] name, point_string = TaskID.split(job_conf['task_id']) t_id = f"{self.owner}/{self.suite}/{point_string}/{name}" j_id = f"{self.owner}/{self.suite}/{int_id}" j_buf = PbJob( checksum=f"{int_id}@{update_time}", id=j_id, submit_num=job_conf['submit_num'], state=JOB_STATUSES_ALL[0], task_proxy=t_id, batch_sys_name=job_conf['batch_system_name'], env_script=job_conf['env-script'], err_script=job_conf['err-script'], exit_script=job_conf['exit-script'], execution_time_limit=job_conf['execution_time_limit'], host=job_conf['host'], init_script=job_conf['init-script'], job_log_dir=job_conf['job_log_dir'], owner=job_owner, post_script=job_conf['post-script'], pre_script=job_conf['pre-script'], script=job_conf['script'], work_sub_dir=job_conf['work_d'], ) j_buf.batch_sys_conf.extend( [f"{key}={val}" for key, val in job_conf['batch_system_conf'].items()]) j_buf.directives.extend( [f"{key}={val}" for key, val in job_conf['directives'].items()]) j_buf.environment.extend( [f"{key}={val}" for key, val in job_conf['environment'].items()]) j_buf.param_env_tmpl.extend( [f"{key}={val}" for key, val in job_conf['param_env_tmpl'].items()]) j_buf.param_var.extend( [f"{key}={val}" for key, val in job_conf['param_var'].items()]) j_buf.extra_logs.extend(job_conf['logfiles']) self.pool[int_id] = j_buf
def generate_ghost_task(self, task_id): """Create task-point element populated with static data. Args: task_id (str): valid TaskID string. Returns: object: cylc.flow.data_messages_pb2.PbTaskProxy Populated task proxy data element. """ update_time = time() name, point_string = TaskID.split(task_id) self.cycle_states.setdefault(point_string, {})[name] = (None, False) t_id = f'{self.workflow_id}{ID_DELIM}{name}' tp_id = f'{self.workflow_id}{ID_DELIM}{point_string}{ID_DELIM}{name}' tp_stamp = f'{tp_id}@{update_time}' taskdef = self.data[self.workflow_id][TASKS].get( t_id, self.updates[TASKS].get(t_id, MESSAGE_MAP[TASKS]) ) tproxy = PbTaskProxy( stamp=tp_stamp, id=tp_id, task=taskdef.id, cycle_point=point_string, depth=taskdef.depth, name=name, ) tproxy.namespace[:] = taskdef.namespace tproxy.parents[:] = [ f'{self.workflow_id}{ID_DELIM}{point_string}{ID_DELIM}{p_name}' for p_name in self.parents[name]] tproxy.ancestors[:] = [ f'{self.workflow_id}{ID_DELIM}{point_string}{ID_DELIM}{a_name}' for a_name in self.ancestors[name] if a_name != name] tproxy.first_parent = tproxy.ancestors[0] return tproxy
def insert_job(self, job_conf): """Insert job into pool.""" job_owner = job_conf['owner'] sub_num = job_conf['submit_num'] name, point_string = TaskID.split(job_conf['task_id']) t_id = f'{self.workflow_id}{ID_DELIM}{point_string}{ID_DELIM}{name}' j_id = f'{t_id}{ID_DELIM}{sub_num}' j_buf = PbJob(stamp=f'{j_id}@{time()}', id=j_id, submit_num=sub_num, state=JOB_STATUSES_ALL[0], task_proxy=t_id, batch_sys_name=job_conf['batch_system_name'], env_script=job_conf['env-script'], err_script=job_conf['err-script'], exit_script=job_conf['exit-script'], execution_time_limit=job_conf['execution_time_limit'], host=job_conf['platform']['name'], init_script=job_conf['init-script'], owner=job_owner, post_script=job_conf['post-script'], pre_script=job_conf['pre-script'], script=job_conf['script'], work_sub_dir=job_conf['work_d'], name=name, cycle_point=point_string, batch_sys_conf=json.dumps(job_conf['batch_system_conf']), directives=json.dumps(job_conf['directives']), environment=json.dumps(job_conf['environment']), param_var=json.dumps(job_conf['param_var'])) # Add in log files. j_buf.job_log_dir = get_task_job_log(self.schd.suite, point_string, name, sub_num) j_buf.extra_logs.extend(job_conf['logfiles']) self.added[j_id] = j_buf self.task_jobs.setdefault(t_id, set()).add(j_id) self.updates_pending = True
def _generate_ghost_task(self, task_id): """Create task instances populated with static data fields.""" update_time = time() name, point_string = TaskID.split(task_id) self.cycle_states.setdefault(point_string, {})[name] = None tp_id = f"{self.workflow_id}/{point_string}/{name}" tp_check = f"{task_id}@{update_time}" taskdef = self.tasks[name] tproxy = PbTaskProxy( checksum=tp_check, id=tp_id, task=taskdef.id, cycle_point=point_string, depth=taskdef.depth, ) tproxy.namespace[:] = taskdef.namespace tproxy.parents[:] = [ f"{self.workflow_id}/{point_string}/{p_name}" for p_name in self.parents[name]] p1_name = self.parents[name][0] tproxy.first_parent = f"{self.workflow_id}/{point_string}/{p1_name}" return tproxy
def get_broadcast(self, task_id=None): """Retrieve all broadcast variables that target a given task ID.""" if task_id == "None": task_id = None if not task_id: # all broadcasts requested return self.broadcasts try: name, point_string = TaskID.split(task_id) except ValueError: raise Exception("Can't split task_id %s" % task_id) ret = {} # The order is: # all:root -> all:FAM -> ... -> all:task # -> tag:root -> tag:FAM -> ... -> tag:task for cycle in self.ALL_CYCLE_POINTS_STRS + [point_string]: if cycle not in self.broadcasts: continue for namespace in reversed(self.linearized_ancestors[name]): if namespace in self.broadcasts[cycle]: self._addict(ret, self.broadcasts[cycle][namespace]) return ret
def get_broadcast(self, task_id=None): """Retrieve all broadcast variables that target a given task ID.""" if task_id == "None": task_id = None if not task_id: # all broadcasts requested return self.broadcasts try: name, point_string = TaskID.split(task_id) except ValueError: raise Exception("Can't split task_id %s" % task_id) ret = {} # The order is: # all:root -> all:FAM -> ... -> all:task # -> tag:root -> tag:FAM -> ... -> tag:task for cycle in ALL_CYCLE_POINTS_STRS + [point_string]: if cycle not in self.broadcasts: continue for namespace in reversed(self.linearized_ancestors[name]): if namespace in self.broadcasts[cycle]: addict(ret, self.broadcasts[cycle][namespace]) return ret
def update_task_proxies(self, updated_tasks=None): """Update dynamic fields of task nodes/proxies. Args: updated_tasks (list): [cylc.flow.task_proxy.TaskProxy] Update task-node from corresponding given list of task proxy objects from the workflow task pool. """ if not updated_tasks: return tasks = self.data[self.workflow_id][TASKS] task_proxies = self.data[self.workflow_id][TASK_PROXIES] update_time = time() task_defs = {} # update task instance for itask in updated_tasks: name, point_string = TaskID.split(itask.identity) tp_id = ( f'{self.workflow_id}{ID_DELIM}{point_string}{ID_DELIM}{name}') if (tp_id not in task_proxies and tp_id not in self.added[TASK_PROXIES]): continue # Gather task definitions for elapsed time recalculation. if name not in task_defs: task_defs[name] = itask.tdef # Create new message and copy existing message content. tp_delta = self.updated[TASK_PROXIES].setdefault( tp_id, PbTaskProxy(id=tp_id)) tp_delta.stamp = f'{tp_id}@{update_time}' tp_delta.state = itask.state.status if tp_id in task_proxies: self.state_update_families.add( task_proxies[tp_id].first_parent) else: self.state_update_families.add( self.added[TASK_PROXIES][tp_id].first_parent) tp_delta.is_held = itask.state.is_held tp_delta.flow_label = itask.flow_label tp_delta.job_submits = itask.submit_num tp_delta.latest_message = itask.summary['latest_message'] tp_delta.jobs[:] = [ j_id for j_id in self.schd.job_pool.task_jobs.get(tp_id, []) if j_id not in task_proxies.get(tp_id, PbTaskProxy()).jobs ] prereq_list = [] for prereq in itask.state.prerequisites: # Protobuf messages populated within prereq_obj = prereq.api_dump(self.workflow_id) if prereq_obj: prereq_list.append(prereq_obj) tp_delta.prerequisites.extend(prereq_list) tp_delta.outputs = json.dumps({ trigger: is_completed for trigger, _, is_completed in itask.state.outputs.get_all() }) extras = {} if itask.tdef.clocktrigger_offset is not None: extras['Clock trigger time reached'] = ( itask.is_waiting_clock_done()) extras['Triggers at'] = time2str(itask.clock_trigger_time) for trig, satisfied in itask.state.external_triggers.items(): key = f'External trigger "{trig}"' if satisfied: extras[key] = 'satisfied' else: extras[key] = 'NOT satisfied' for label, satisfied in itask.state.xtriggers.items(): sig = self.schd.xtrigger_mgr.get_xtrig_ctx( itask, label).get_signature() extra = f'xtrigger "{label} = {sig}"' if satisfied: extras[extra] = 'satisfied' else: extras[extra] = 'NOT satisfied' tp_delta.extras = json.dumps(extras) # Recalculate effected task def elements elapsed time. for name, tdef in task_defs.items(): elapsed_time = task_mean_elapsed_time(tdef) if elapsed_time: t_id = f'{self.workflow_id}{ID_DELIM}{name}' t_delta = PbTask(stamp=f'{t_id}@{update_time}', mean_elapsed_time=elapsed_time) self.updated[TASKS].setdefault( t_id, PbTask(id=t_id)).MergeFrom(t_delta) tasks[t_id].MergeFrom(t_delta)
def generate_graph_elements(self, start_point=None, stop_point=None): """Generate edges and [ghost] nodes (family and task proxy elements). Args: start_point (cylc.flow.cycling.PointBase): Edge generation start point. stop_point (cylc.flow.cycling.PointBase): Edge generation stop point. """ if not self.pool_points: return config = self.schd.config if start_point is None: start_point = min(self.pool_points) if stop_point is None: stop_point = max(self.pool_points) # Reference set for workflow relations new_edges = set() # Generate ungrouped edges for edge in config.get_graph_edges(start_point, stop_point): # Reference or create edge source & target nodes/proxies s_node = edge[0] t_node = edge[1] if s_node is None: continue # Is the source cycle point in the task pool? s_name, s_point = TaskID.split(s_node) s_point_cls = get_point(s_point) s_pool_point = False s_valid = TaskID.is_valid_id(s_node) if s_valid: s_pool_point = s_point_cls in self.pool_points # Is the target cycle point in the task pool? t_pool_point = False t_valid = t_node and TaskID.is_valid_id(t_node) if t_valid: t_name, t_point = TaskID.split(t_node) t_point_cls = get_point(t_point) t_pool_point = get_point(t_point) in self.pool_points # Proceed if either source or target cycle points # are in the task pool. if not s_pool_point and not t_pool_point: continue # If source/target is valid add/create the corresponding items. # TODO: if xtrigger is suite_state create remote ID source_id = ( f'{self.workflow_id}{ID_DELIM}{s_point}{ID_DELIM}{s_name}') # Add valid source before checking for no target, # as source may be an isolate (hence no edges). if s_valid: s_task_id = f'{self.workflow_id}{ID_DELIM}{s_name}' # Add source points for pruning. self.edge_points.setdefault(s_point_cls, set()) self.generate_ghost_task(s_task_id, source_id, s_point) # If target is valid then created it. # Edges are only created for valid targets. # At present targets can't be xtriggers. if t_valid: target_id = ( f'{self.workflow_id}{ID_DELIM}{t_point}{ID_DELIM}{t_name}') t_task_id = f'{self.workflow_id}{ID_DELIM}{t_name}' # Add target points to associated source points for pruning. self.edge_points.setdefault(s_point_cls, set()) self.edge_points[s_point_cls].add(t_point_cls) self.generate_ghost_task(t_task_id, target_id, t_point) # Initiate edge element. e_id = ( f'{self.workflow_id}{ID_DELIM}{s_node}{ID_DELIM}{t_node}') self.added[EDGES][e_id] = PbEdge( id=e_id, suicide=edge[3], cond=edge[4], source=source_id, target=target_id, ) new_edges.add(e_id) # Add edge id to node field for resolver reference self.updated[TASK_PROXIES].setdefault( target_id, PbTaskProxy(id=target_id)).edges.append(e_id) if s_valid: self.updated[TASK_PROXIES].setdefault( source_id, PbTaskProxy(id=source_id)).edges.append(e_id) if new_edges: getattr(self.updated[WORKFLOW], EDGES).edges.extend(new_edges)
def test_split(self): self.assertEqual(["a", '1'], TaskID.split("a.1")) self.assertEqual(["a", '_1'], TaskID.split("a._1")) self.assertEqual( ["WTAS", '20101010T101010'], TaskID.split("WTAS.20101010T101010"))
def main(_, options, suite, *task_args): """Implement "cylc show" CLI.""" pclient = SuiteRuntimeClient(suite, timeout=options.comms_timeout) json_filter = {} if not task_args: query = WORKFLOW_META_QUERY query_kwargs = { 'request_string': query, 'variables': { 'wFlows': [suite] } } # Print suite info. results = pclient('graphql', query_kwargs) for workflow in results['workflows']: flat_data = flatten_data(workflow) if options.json: json_filter.update(flat_data) else: for key, value in sorted(flat_data.items(), reverse=True): ansiprint( f'<bold>{key}:</bold> {value or "<m>(not given)</m>"}') task_names = [arg for arg in task_args if TaskID.is_valid_name(arg)] task_ids = [arg for arg in task_args if TaskID.is_valid_id_2(arg)] if task_names: tasks_query = TASK_META_QUERY tasks_kwargs = { 'request_string': tasks_query, 'variables': { 'wFlows': [suite], 'taskIds': task_names } } # Print suite info. results = pclient('graphql', tasks_kwargs) multi = len(results['tasks']) > 1 for task in results['tasks']: flat_data = flatten_data(task['meta']) if options.json: json_filter.update({task['name']: flat_data}) else: if multi: print(f'----\nTASK NAME: {task["name"]}') for key, value in sorted(flat_data.items(), reverse=True): ansiprint( f'<bold>{key}:</bold> {value or "<m>(not given)</m>"}') if task_ids: tp_query = TASK_PREREQS_QUERY tp_kwargs = { 'request_string': tp_query, 'variables': { 'wFlows': [suite], 'taskIds': [ f'{c}{ID_DELIM}{n}' for n, c in [ TaskID.split(t_id) for t_id in task_ids if TaskID.is_valid_id(t_id) ] ] + [ f'{c}{ID_DELIM}{n}' for c, n in [ t_id.rsplit(TaskID.DELIM2, 1) for t_id in task_ids if not TaskID.is_valid_id(t_id) ] ] } } results = pclient('graphql', tp_kwargs) multi = len(results['taskProxies']) > 1 for t_proxy in results['taskProxies']: task_id = TaskID.get(t_proxy['name'], t_proxy['cyclePoint']) if options.json: json_filter.update({task_id: t_proxy}) else: if multi: print(f'----\nTASK ID: {task_id}') prereqs = [] for item in t_proxy['prerequisites']: prefix = '' multi_cond = len(item['conditions']) > 1 if multi_cond: prereqs.append([ True, '', item['expression'].replace('c', ''), item['satisfied'] ]) for cond in item['conditions']: if multi_cond and not options.list_prereqs: prefix = f'\t{cond["exprAlias"].strip("c")} = ' _, _, point, name = cond['taskId'].split(ID_DELIM) cond_id = TaskID.get(name, point) prereqs.append([ False, prefix, f'{cond_id} {cond["reqState"]}', cond['satisfied'] ]) if options.list_prereqs: for composite, _, msg, _ in prereqs: if not composite: print(msg) else: flat_meta = flatten_data(t_proxy['task']['meta']) for key, value in sorted(flat_meta.items(), reverse=True): ansiprint(f'<bold>{key}:</bold>' f' {value or "<m>(not given)</m>"}') ansiprint('\n<bold>prerequisites</bold>' ' (<red>- => not satisfied</red>):') if not prereqs: print(' (None)') for _, prefix, msg, state in prereqs: print_msg_state(f'{prefix}{msg}', state) ansiprint('\n<bold>outputs</bold>' ' (<red>- => not completed</red>):') if not t_proxy['outputs']: print(' (None)') for key, val in t_proxy['outputs'].items(): print_msg_state(f'{task_id} {key}', val) if t_proxy['extras']: print('\nother:') for key, value in t_proxy['extras'].items(): print(' o %s ... %s' % (key, value)) if not results['taskProxies']: ansiprint(f"<red>No matching tasks found: {task_ids}", file=sys.stderr) sys.exit(1) if options.json: print(json.dumps(json_filter, indent=4))
def test_split(self): self.assertEqual(["a", '1'], TaskID.split("a.1")) self.assertEqual(["a", '_1'], TaskID.split("a._1")) self.assertEqual(["WTAS", '20101010T101010'], TaskID.split("WTAS.20101010T101010"))
def update(self, schd): """Update.""" self.update_time = time() global_summary = {} family_summary = {} task_summary, task_states = self._get_tasks_info(schd) all_states = [] ancestors_dict = schd.config.get_first_parent_ancestors() # Compute state_counts (total, and per cycle). state_count_totals = {} state_count_cycles = {} for point_string, c_task_states in task_states.items(): # For each cycle point, construct a family state tree # based on the first-parent single-inheritance tree c_fam_task_states = {} count = {} for key in c_task_states: state = c_task_states[key] if state is None: continue try: count[state] += 1 except KeyError: count[state] = 1 all_states.append(state) for parent in ancestors_dict.get(key, []): if parent == key: continue c_fam_task_states.setdefault(parent, set([])) c_fam_task_states[parent].add(state) state_count_cycles[point_string] = count for fam, child_states in c_fam_task_states.items(): f_id = TaskID.get(fam, point_string) state = extract_group_state(child_states) if state is None: continue try: famcfg = schd.config.cfg['runtime'][fam]['meta'] except KeyError: famcfg = {} description = famcfg.get('description') title = famcfg.get('title') family_summary[f_id] = {'name': fam, 'description': description, 'title': title, 'label': point_string, 'state': state} state_count_totals = {} for point_string, count in list(state_count_cycles.items()): for state, state_count in count.items(): state_count_totals.setdefault(state, 0) state_count_totals[state] += state_count all_states.sort() for key, value in ( ('oldest cycle point string', schd.pool.get_min_point()), ('newest cycle point string', schd.pool.get_max_point()), ('newest runahead cycle point string', schd.pool.get_max_point_runahead())): if value: global_summary[key] = str(value) else: global_summary[key] = None if get_utc_mode(): global_summary['time zone info'] = TIME_ZONE_UTC_INFO else: global_summary['time zone info'] = TIME_ZONE_LOCAL_INFO global_summary['last_updated'] = self.update_time global_summary['run_mode'] = schd.run_mode global_summary['states'] = all_states global_summary['namespace definition order'] = ( schd.config.ns_defn_order) global_summary['reloading'] = schd.pool.do_reload global_summary['state totals'] = state_count_totals # Extract suite and task URLs from config. global_summary['suite_urls'] = dict( (i, j['meta']['URL']) for (i, j) in schd.config.cfg['runtime'].items()) global_summary['suite_urls']['suite'] = schd.config.cfg['meta']['URL'] # Construct a suite status string for use by monitoring clients. if schd.pool.is_held: global_summary['status_string'] = SUITE_STATUS_HELD elif schd.stop_mode is not None: global_summary['status_string'] = SUITE_STATUS_STOPPING elif schd.pool.hold_point: global_summary['status_string'] = ( SUITE_STATUS_RUNNING_TO_HOLD % schd.pool.hold_point) elif schd.stop_point: global_summary['status_string'] = ( SUITE_STATUS_RUNNING_TO_STOP % schd.stop_point) elif schd.stop_clock_time is not None: global_summary['status_string'] = ( SUITE_STATUS_RUNNING_TO_STOP % schd.stop_clock_time_string) elif schd.stop_task: global_summary['status_string'] = ( SUITE_STATUS_RUNNING_TO_STOP % schd.stop_task) elif schd.final_point: global_summary['status_string'] = ( SUITE_STATUS_RUNNING_TO_STOP % schd.final_point) else: global_summary['status_string'] = SUITE_STATUS_RUNNING # Replace the originals (atomic update, for access from other threads). self.task_summary = task_summary self.global_summary = global_summary self.family_summary = family_summary self.state_count_totals = state_count_totals self.state_count_cycles = state_count_cycles
def test_get(self): self.assertEqual("a.1", TaskID.get("a", 1)) self.assertEqual("a._1", TaskID.get("a", "_1")) self.assertEqual( "WTASK.20101010T101010", TaskID.get("WTASK", "20101010T101010"))
def __init__( self, tdef, start_point, status=TASK_STATUS_WAITING, hold_swap=None, has_spawned=False, stop_point=None, is_startup=False, submit_num=0, is_late=False): self.tdef = tdef if submit_num is None: submit_num = 0 self.submit_num = submit_num if is_startup: # adjust up to the first on-sequence cycle point adjusted = [] for seq in self.tdef.sequences: adj = seq.get_first_point(start_point) if adj: # may be None if out of sequence bounds adjusted.append(adj) if not adjusted: # This task is out of sequence bounds raise TaskProxySequenceBoundsError(self.tdef.name) self.point = min(adjusted) self.late_time = None else: self.point = start_point self.cleanup_cutoff = self.tdef.get_cleanup_cutoff_point(self.point) self.identity = TaskID.get(self.tdef.name, self.point) self.has_spawned = has_spawned self.reload_successor = None self.point_as_seconds = None # Manually inserted tasks may have a final cycle point set. self.stop_point = stop_point self.manual_trigger = False self.is_manual_submit = False self.summary = { 'latest_message': '', 'submitted_time': None, 'submitted_time_string': None, 'started_time': None, 'started_time_string': None, 'finished_time': None, 'finished_time_string': None, 'logfiles': [], 'job_hosts': {}, 'execution_time_limit': None, 'batch_sys_name': None, 'submit_method_id': None } self.local_job_file_path = None self.task_host = 'localhost' self.task_owner = None self.job_vacated = False self.poll_timer = None self.timeout = None self.try_timers = {} # Use dict here for Python 2.6 compat. # Should use collections.Counter in Python 2.7+ self.non_unique_events = {} self.clock_trigger_time = None self.expire_time = None self.late_time = None self.is_late = is_late self.state = TaskState(tdef, self.point, status, hold_swap) if tdef.sequential: # Adjust clean-up cutoff. p_next = None adjusted = [] for seq in tdef.sequences: nxt = seq.get_next_point(self.point) if nxt: # may be None if beyond the sequence bounds adjusted.append(nxt) if adjusted: p_next = min(adjusted) if (self.cleanup_cutoff is not None and self.cleanup_cutoff < p_next): self.cleanup_cutoff = p_next