def __init__( self, suite, owner=None, host=None, port=None, timeout=None, my_uuid=None, print_uuid=False, auth=None): self.suite = suite if not owner: owner = get_user() self.owner = owner self.host = host if self.host and self.host.split('.')[0] == 'localhost': self.host = get_host() elif self.host and '.' not in self.host: # Not IP and no domain self.host = get_fqdn_by_host(self.host) self.port = port self.srv_files_mgr = SuiteSrvFilesManager() if timeout is not None: timeout = float(timeout) self.timeout = timeout self.my_uuid = my_uuid or uuid4() if print_uuid: sys.stderr.write('%s\n' % self.my_uuid) self.prog_name = os.path.basename(sys.argv[0]) self.auth = auth self.session = None self.comms1 = {} # content in primary contact file self.comms2 = {} # content in extra contact file, e.g. contact via ssh
def _setup_event_mail(self, itask, event): """Set up task event notification, by email.""" if event in self.NON_UNIQUE_EVENTS: key1 = (self.HANDLER_MAIL, '%s-%d' % (event, itask.non_unique_events.get(event, 1))) else: key1 = (self.HANDLER_MAIL, event) id_key = (key1, str(itask.point), itask.tdef.name, itask.submit_num) if (id_key in self.event_timers or event not in self._get_events_conf( itask, "mail events", [])): return retry_delays = self._get_events_conf(itask, "mail retry delays") if not retry_delays: retry_delays = [0] self.event_timers[id_key] = TaskActionTimer( TaskEventMailContext( self.HANDLER_MAIL, # key self.HANDLER_MAIL, # ctx_type self._get_events_conf( # mail_from itask, "mail from", "notifications@" + get_host(), ), self._get_events_conf(itask, "mail to", get_user()), # mail_to self._get_events_conf(itask, "mail smtp"), # mail_smtp ), retry_delays)
def _add_image_box(self, suite_host_info_tuples): image_eb = gtk.EventBox() image_eb.show() running_status_list = [] status_list = [] suite_keys = [] for info_tuple in suite_host_info_tuples: suite, host, status, _, is_stopped = info_tuple suite_keys.append((host, get_user(), suite)) if not is_stopped: running_status_list.append(status) status_list.append(status) if running_status_list: status = extract_group_state(running_status_list, is_stopped=False) image = self.dots.get_image(status, is_stopped=False) else: status = extract_group_state(status_list, is_stopped=True) image = self.dots.get_image(status, is_stopped=True) image.show() image_eb.add(image) image_eb._connect_args = suite_keys image_eb.connect("button-press-event", self._on_button_press_event) text_format = "%s - %s - %s" long_text_format = text_format + "\n %s\n" text = "" tip_vbox = gtk.VBox() # Only used in PyGTK 2.12+ tip_vbox.show() for info_tuple in suite_host_info_tuples: suite, host, status, state_counts, is_stopped = info_tuple state_counts.sort(lambda x, y: cmp(y[1], x[1])) tip_hbox = gtk.HBox() tip_hbox.show() state_info = [] for state_name, number in state_counts: state_info.append("%d %s" % (number, state_name)) image = self.dots.get_image(state_name, is_stopped=is_stopped) image.show() tip_hbox.pack_start(image, expand=False, fill=False) states_text = ", ".join(state_info) if status is None: suite_summary = "?" else: suite_summary = status if is_stopped: suite_summary = "stopped with " + suite_summary tip_label = gtk.Label(text_format % (suite, suite_summary, host)) tip_label.show() tip_hbox.pack_start(tip_label, expand=False, fill=False, padding=5) tip_vbox.pack_start(tip_hbox, expand=False, fill=False) text += long_text_format % (suite, suite_summary, host, states_text) text = text.rstrip() if hasattr(gtk, "Tooltip"): image_eb.set_has_tooltip(True) image_eb.connect("query-tooltip", self._on_img_tooltip_query, tip_vbox) else: self._set_tooltip(image_eb, text) self.dot_hbox.pack_start(image_eb, expand=False, fill=False, padding=1)
def __init__(self, suite, owner=None, host=None, port=None, timeout=None, my_uuid=None, print_uuid=False, comms_protocol=None, auth=None): self.suite = suite if not owner: owner = get_user() self.owner = owner self.host = host if self.host and self.host.split('.')[0] == 'localhost': self.host = get_host() elif self.host and '.' not in self.host: # Not IP and no domain self.host = get_fqdn_by_host(self.host) self.port = port self.srv_files_mgr = SuiteSrvFilesManager() self.comms_protocol = comms_protocol if timeout is not None: timeout = float(timeout) self.timeout = timeout self.my_uuid = my_uuid or uuid4() if print_uuid: print >> sys.stderr, '%s' % self.my_uuid self.prog_name = os.path.basename(sys.argv[0]) self.auth = auth
def _setup_job_logs_retrieval(self, itask, event): """Set up remote job logs retrieval. For a task with a job completion event, i.e. succeeded, failed, (execution) retry. """ id_key = ((self.HANDLER_JOB_LOGS_RETRIEVE, event), str(itask.point), itask.tdef.name, itask.submit_num) if itask.task_owner: user_at_host = itask.task_owner + "@" + itask.task_host else: user_at_host = itask.task_host events = (self.EVENT_FAILED, self.EVENT_RETRY, self.EVENT_SUCCEEDED) if (event not in events or user_at_host in [get_user() + '@localhost', 'localhost'] or not self.get_host_conf(itask, "retrieve job logs") or id_key in self.event_timers): return retry_delays = self.get_host_conf(itask, "retrieve job logs retry delays") if not retry_delays: retry_delays = [0] self.event_timers[id_key] = TaskActionTimer( TaskJobLogsRetrieveContext( self.HANDLER_JOB_LOGS_RETRIEVE, # key self.HANDLER_JOB_LOGS_RETRIEVE, # ctx_type user_at_host, self.get_host_conf(itask, "retrieve job logs max size"), ), retry_delays)
def __init__( self, suite, owner=None, host=None, port=None, timeout=None, my_uuid=None, print_uuid=False, auth=None): self.suite = suite if not owner: owner = get_user() self.owner = owner self.host = host if self.host and self.host.split('.')[0] == 'localhost': self.host = get_host() elif self.host and '.' not in self.host: # Not IP and no domain self.host = get_fqdn_by_host(self.host) self.port = port self.srv_files_mgr = SuiteSrvFilesManager() if timeout is not None: timeout = float(timeout) self.timeout = timeout self.my_uuid = my_uuid or uuid4() if print_uuid: sys.stderr.write('%s\n' % self.my_uuid) self.prog_name = os.path.basename(sys.argv[0]) self.auth = auth self.session = None self.comms1 = {} # content in primary contact file self.comms2 = {} # content in extra contact file, e.g. contact via ssh
def _setup_job_logs_retrieval(self, itask, event): """Set up remote job logs retrieval. For a task with a job completion event, i.e. succeeded, failed, (execution) retry. """ id_key = ( (self.HANDLER_JOB_LOGS_RETRIEVE, event), str(itask.point), itask.tdef.name, itask.submit_num) if itask.task_owner: user_at_host = itask.task_owner + "@" + itask.task_host else: user_at_host = itask.task_host events = (self.EVENT_FAILED, self.EVENT_RETRY, self.EVENT_SUCCEEDED) if (event not in events or user_at_host in [get_user() + '@localhost', 'localhost'] or not self.get_host_conf(itask, "retrieve job logs") or id_key in self.event_timers): return retry_delays = self.get_host_conf( itask, "retrieve job logs retry delays") if not retry_delays: retry_delays = [0] self.event_timers[id_key] = TaskActionTimer( TaskJobLogsRetrieveContext( self.HANDLER_JOB_LOGS_RETRIEVE, # key self.HANDLER_JOB_LOGS_RETRIEVE, # ctx_type user_at_host, self.get_host_conf(itask, "retrieve job logs max size"), ), retry_delays)
def _setup_event_mail(self, itask, event): """Set up task event notification, by email.""" if event in self.NON_UNIQUE_EVENTS: key1 = ( self.HANDLER_MAIL, '%s-%d' % (event, itask.non_unique_events.get(event, 1))) else: key1 = (self.HANDLER_MAIL, event) id_key = (key1, str(itask.point), itask.tdef.name, itask.submit_num) if (id_key in self.event_timers or event not in self._get_events_conf(itask, "mail events", [])): return retry_delays = self._get_events_conf(itask, "mail retry delays") if not retry_delays: retry_delays = [0] self.event_timers[id_key] = TaskActionTimer( TaskEventMailContext( self.HANDLER_MAIL, # key self.HANDLER_MAIL, # ctx_type self._get_events_conf( # mail_from itask, "mail from", "notifications@" + get_host(), ), self._get_events_conf(itask, "mail to", get_user()), # mail_to self._get_events_conf(itask, "mail smtp"), # mail_smtp ), retry_delays)
def get_suite_srv_dir(self, reg, suite_owner=None): """Return service directory of a suite.""" if not suite_owner: suite_owner = get_user() run_d = os.getenv("CYLC_SUITE_RUN_DIR") if (not run_d or os.getenv("CYLC_SUITE_NAME") != reg or os.getenv("CYLC_SUITE_OWNER") != suite_owner): run_d = glbl_cfg().get_derived_host_item( reg, 'suite run directory') return os.path.join(run_d, self.DIR_BASE_SRV)
def _get_headers(self): """Return HTTP headers identifying the client.""" user_agent_string = ( "cylc/%s prog_name/%s uuid/%s" % ( CYLC_VERSION, self.prog_name, self.my_uuid ) ) auth_info = "%s@%s" % (get_user(), get_host()) return {"User-Agent": user_agent_string, "From": auth_info}
def _get_headers(self): """Return HTTP headers identifying the client.""" user_agent_string = ( "cylc/%s prog_name/%s uuid/%s" % ( CYLC_VERSION, self.prog_name, self.my_uuid ) ) auth_info = "%s@%s" % (get_user(), get_host()) return {"User-Agent": user_agent_string, "From": auth_info}
def get_suite_srv_dir(self, reg, suite_owner=None): """Return service directory of a suite.""" if not suite_owner: suite_owner = get_user() run_d = os.getenv("CYLC_SUITE_RUN_DIR") if (not run_d or os.getenv("CYLC_SUITE_NAME") != reg or os.getenv("CYLC_SUITE_OWNER") != suite_owner): run_d = glbl_cfg().get_derived_host_item(reg, 'suite run directory') return os.path.join(run_d, self.DIR_BASE_SRV)
def get_suite_srv_dir(self, reg, suite_owner=None): """Return service directory of a suite.""" if not suite_owner: suite_owner = get_user() run_d = os.getenv("CYLC_SUITE_RUN_DIR") if (not run_d or os.getenv("CYLC_SUITE_NAME") != reg or os.getenv("CYLC_SUITE_OWNER") != suite_owner): from cylc.cfgspec.globalcfg import GLOBAL_CFG run_d = GLOBAL_CFG.get_derived_host_item(reg, 'suite run directory') return os.path.join(run_d, self.DIR_BASE_SRV)
def _run_event_mail(self, config, ctx): """Helper for "run_event_handlers", do mail notification.""" if ctx.event in self.get_events_conf(config, 'mail events', []): # SMTP server env = dict(os.environ) mail_smtp = self.get_events_conf(config, 'mail smtp') if mail_smtp: env['smtp'] = mail_smtp subject = '[suite %(event)s] %(suite)s' % { 'suite': ctx.suite, 'event': ctx.event} stdin_str = '' for name, value in [ ('suite event', ctx.event), ('reason', ctx.reason), ('suite', ctx.suite), ('host', ctx.host), ('port', ctx.port), ('owner', ctx.owner)]: if value: stdin_str += '%s: %s\n' % (name, value) mail_footer_tmpl = self.get_events_conf(config, 'mail footer') if mail_footer_tmpl: stdin_str += (mail_footer_tmpl + '\n') % { 'host': ctx.host, 'port': ctx.port, 'owner': ctx.owner, 'suite': ctx.suite} proc_ctx = SuiteProcContext( (self.SUITE_EVENT_HANDLER, ctx.event), [ 'mail', '-s', subject, '-r', self.get_events_conf( config, 'mail from', 'notifications@' + get_host()), self.get_events_conf(config, 'mail to', get_user()), ], env=env, stdin_str=stdin_str) if self.proc_pool.closed: # Run command in foreground if process pool is closed self.proc_pool.run_command(proc_ctx) self._run_event_handlers_callback(proc_ctx) else: # Run command using process pool otherwise self.proc_pool.put_command( proc_ctx, self._run_event_mail_callback)
def _run_event_mail(self, config, ctx): """Helper for "run_event_handlers", do mail notification.""" if ctx.event in self.get_events_conf(config, 'mail events', []): # SMTP server env = dict(os.environ) mail_smtp = self.get_events_conf(config, 'mail smtp') if mail_smtp: env['smtp'] = mail_smtp subject = '[suite %(event)s] %(suite)s' % { 'suite': ctx.suite, 'event': ctx.event } stdin_str = '' for name, value in [('suite event', ctx.event), ('reason', ctx.reason), ('suite', ctx.suite), ('host', ctx.host), ('port', ctx.port), ('owner', ctx.owner)]: if value: stdin_str += '%s: %s\n' % (name, value) mail_footer_tmpl = self.get_events_conf(config, 'mail footer') if mail_footer_tmpl: stdin_str += (mail_footer_tmpl + '\n') % { 'host': ctx.host, 'port': ctx.port, 'owner': ctx.owner, 'suite': ctx.suite } proc_ctx = SuiteProcContext( (self.SUITE_EVENT_HANDLER, ctx.event), [ 'mail', '-s', subject, '-r', self.get_events_conf(config, 'mail from', 'notifications@' + get_host()), self.get_events_conf(config, 'mail to', get_user()), ], env=env, stdin_str=stdin_str) if self.proc_pool.is_closed(): # Run command in foreground if process pool is closed self.proc_pool.run_command(proc_ctx) self._run_event_handlers_callback(proc_ctx) else: # Run command using process pool otherwise self.proc_pool.put_command(proc_ctx, self._run_event_mail_callback)
def cache_passphrase(self, reg, owner, host, value): """Cache and dump passphrase for a remote suite in standard location. Save passphrase to ~/.cylc/auth/owner@host/reg if possible. This is normally called on a successful authentication, and will cache the remote passphrase in memory as well. """ if owner is None: owner = get_user() if host is None: host = get_host() path = self._get_cache_dir(reg, owner, host) self.cache[self.FILE_BASE_PASSPHRASE][(reg, owner, host)] = value # Dump to a file only for remote suites loaded via SSH. if self.can_disk_cache_passphrases.get((reg, owner, host)): # Although not desirable, failing to dump the passphrase to a file # is not disastrous. try: self._dump_item(path, self.FILE_BASE_PASSPHRASE, value) except (IOError, OSError): if cylc.flags.debug: import traceback traceback.print_exc()
def cache_passphrase(self, reg, owner, host, value): """Cache and dump passphrase for a remote suite in standard location. Save passphrase to ~/.cylc/auth/owner@host/reg if possible. This is normally called on a successful authentication, and will cache the remote passphrase in memory as well. """ if owner is None: owner = get_user() if host is None: host = get_host() path = self._get_cache_dir(reg, owner, host) self.cache[self.FILE_BASE_PASSPHRASE][(reg, owner, host)] = value # Dump to a file only for remote suites loaded via SSH. if self.can_disk_cache_passphrases.get((reg, owner, host)): # Although not desirable, failing to dump the passphrase to a file # is not disastrous. try: self._dump_item(path, self.FILE_BASE_PASSPHRASE, value) except (IOError, OSError): if cylc.flags.debug: import traceback traceback.print_exc()
def _is_local_auth_ok(self, reg, owner, host): """Return True if it is OK to use local passphrase file. Use values in ~/cylc-run/REG/.service/contact to make a judgement. Cache results in self.can_use_load_auths. """ if (reg, owner, host) not in self.can_use_load_auths: if is_remote(host, owner): fname = os.path.join( self.get_suite_srv_dir(reg), self.FILE_BASE_CONTACT) data = {} try: for line in open(fname): key, value = ( [item.strip() for item in line.split("=", 1)]) data[key] = value except (IOError, ValueError): # No contact file self.can_use_load_auths[(reg, owner, host)] = False else: # Contact file exists, check values match if owner is None: owner = get_user() if host is None: host = get_host() host_value = data.get(self.KEY_HOST, "") self.can_use_load_auths[(reg, owner, host)] = ( reg == data.get(self.KEY_NAME) and owner == data.get(self.KEY_OWNER) and ( host == host_value or host == host_value.split(".", 1)[0] # no domain ) ) else: self.can_use_load_auths[(reg, owner, host)] = True return self.can_use_load_auths[(reg, owner, host)]
def _is_local_auth_ok(self, reg, owner, host): """Return True if it is OK to use local passphrase, ssl.* files. Use values in ~/cylc-run/REG/.service/contact to make a judgement. Cache results in self.can_use_load_auths. """ if (reg, owner, host) not in self.can_use_load_auths: if is_remote(host, owner): fname = os.path.join( self.get_suite_srv_dir(reg), self.FILE_BASE_CONTACT) data = {} try: for line in open(fname): key, value = ( [item.strip() for item in line.split("=", 1)]) data[key] = value except (IOError, ValueError): # No contact file self.can_use_load_auths[(reg, owner, host)] = False else: # Contact file exists, check values match if owner is None: owner = get_user() if host is None: host = get_host() host_value = data.get(self.KEY_HOST, "") self.can_use_load_auths[(reg, owner, host)] = ( reg == data.get(self.KEY_NAME) and owner == data.get(self.KEY_OWNER) and ( host == host_value or host == host_value.split(".", 1)[0] # no domain ) ) else: self.can_use_load_auths[(reg, owner, host)] = True return self.can_use_load_auths[(reg, owner, host)]
def update_suites_info(updater, full_mode=False): """Return mapping of suite info by host, owner and suite name. Args: updater (object): gscan or gpanel updater: Compulsory attributes from updater: hosts: hosts to scan owner_pattern: re to filter results by owners suite_info_map: previous results returned by this function Optional attributes from updater: timeout: communication timeout full_mode (boolean): update in full mode? Return: dict: {(host, owner, name): suite_info, ...} where each "suite_info" is a dict with keys: KEY_GROUP: group name of suite KEY_META: suite metadata (new in 7.6) KEY_OWNER: suite owner name KEY_PORT: suite port, for running suites only KEY_STATES: suite state KEY_TASKS_BY_STATE: tasks by state KEY_TITLE: suite title KEY_UPDATE_TIME: last update time of suite """ # Compulsory attributes from updater # hosts - hosts to scan, or the default set in the site/user global.rc # owner_pattern - return only suites with owners matching this compiled re # suite_info_map - previous results returned by this function # Optional attributes from updater # timeout - communication timeout owner_pattern = updater.owner_pattern timeout = getattr(updater, "comms_timeout", None) # name_pattern - return only suites with names matching this compiled re name_pattern = getattr(updater, "name_pattern", None) # Determine items to scan results = {} items = [] if full_mode and updater.hosts: # Scan full port range on all hosts items.extend(updater.hosts) if owner_pattern is None: owner_pattern = re.compile(r"\A" + get_user() + r"\Z") elif full_mode: # Get (host, port) list from file system items.extend(get_scan_items_from_fs(owner_pattern, updater)) else: # Scan suites in previous results only for (host, owner, name), prev_result in updater.suite_info_map.items(): port = prev_result.get(KEY_PORT) if port: items.append((host, port)) else: results[(host, owner, name)] = prev_result if not items: return results if cylc.flags.debug: sys.stderr.write('Scan items:%s%s\n' % ( DEBUG_DELIM, DEBUG_DELIM.join(str(item) for item in items))) # Scan for host, port, result in scan_many( items, timeout=timeout, updater=updater): if updater.quit: return if (name_pattern and not name_pattern.match(result[KEY_NAME]) or owner_pattern and not owner_pattern.match(result[KEY_OWNER])): continue try: result[KEY_PORT] = port results[(host, result[KEY_OWNER], result[KEY_NAME])] = result result[KEY_UPDATE_TIME] = int(float(result[KEY_UPDATE_TIME])) except (KeyError, TypeError, ValueError): pass expire_threshold = time() - DURATION_EXPIRE_STOPPED for (host, owner, name), prev_result in updater.suite_info_map.items(): if updater.quit: return if ((host, owner, name) in results or owner_pattern and not owner_pattern.match(owner) or name_pattern and not name_pattern.match(name)): # OK if suite already in current results set. # Don't bother if: # * previous owner does not match current owner pattern # * previous suite name does not match current name pattern continue if prev_result.get(KEY_PORT): # A previously running suite is no longer running. # Get suite info with "ls-checkpoints", if possible, and include in # the results set. try: prev_result.update( _update_stopped_suite_info((host, owner, name))) del prev_result[KEY_PORT] except (IndexError, TypeError, ValueError): continue if prev_result.get(KEY_UPDATE_TIME, 0) > expire_threshold: results[(host, owner, name)] = prev_result return results
def _add_image_box(self, suite_host_info_tuples): image_eb = gtk.EventBox() image_eb.show() running_status_list = [] status_list = [] suite_keys = [] for info_tuple in suite_host_info_tuples: suite, host, status, _, is_stopped = info_tuple suite_keys.append((host, get_user(), suite)) if not is_stopped: running_status_list.append(status) status_list.append(status) if running_status_list: status = extract_group_state(running_status_list, is_stopped=False) image = self.dots.get_image(status, is_stopped=False) else: status = extract_group_state(status_list, is_stopped=True) image = self.dots.get_image(status, is_stopped=True) image.show() image_eb.add(image) image_eb._connect_args = suite_keys image_eb.connect("button-press-event", self._on_button_press_event) text_format = "%s - %s - %s" long_text_format = text_format + "\n %s\n" text = "" tip_vbox = gtk.VBox() # Only used in PyGTK 2.12+ tip_vbox.show() for info_tuple in suite_host_info_tuples: suite, host, status, state_counts, is_stopped = info_tuple state_counts.sort(lambda x, y: cmp(y[1], x[1])) tip_hbox = gtk.HBox() tip_hbox.show() state_info = [] for state_name, number in state_counts: state_info.append("%d %s" % (number, state_name)) image = self.dots.get_image(state_name, is_stopped=is_stopped) image.show() tip_hbox.pack_start(image, expand=False, fill=False) states_text = ", ".join(state_info) if status is None: suite_summary = "?" else: suite_summary = status if is_stopped: suite_summary = "stopped with " + suite_summary tip_label = gtk.Label(text_format % (suite, suite_summary, host)) tip_label.show() tip_hbox.pack_start(tip_label, expand=False, fill=False, padding=5) tip_vbox.pack_start(tip_hbox, expand=False, fill=False) text += long_text_format % ( suite, suite_summary, host, states_text) text = text.rstrip() if hasattr(gtk, "Tooltip"): image_eb.set_has_tooltip(True) image_eb.connect("query-tooltip", self._on_img_tooltip_query, tip_vbox) else: self._set_tooltip(image_eb, text) self.dot_hbox.pack_start(image_eb, expand=False, fill=False, padding=1)
def _setup_custom_event_handlers(self, itask, event, message): """Set up custom task event handlers.""" handlers = self._get_events_conf(itask, event + ' handler') if (handlers is None and event in self._get_events_conf( itask, 'handler events', [])): handlers = self._get_events_conf(itask, 'handlers') if handlers is None: return retry_delays = self._get_events_conf( itask, 'handler retry delays', self.get_host_conf(itask, "task event handler retry delays")) if not retry_delays: retry_delays = [0] # There can be multiple custom event handlers for i, handler in enumerate(handlers): key1 = ("%s-%02d" % (self.HANDLER_CUSTOM, i), event) id_key = (key1, str(itask.point), itask.tdef.name, itask.submit_num) if id_key in self.event_timers: continue # Note: user@host may not always be set for a submit number, e.g. # on late event or if host select command fails. Use null string to # prevent issues in this case. user_at_host = itask.summary['job_hosts'].get(itask.submit_num, '') if user_at_host and '@' not in user_at_host: # (only has 'user@' on the front if user is not suite owner). user_at_host = '%s@%s' % (get_user(), user_at_host) # Custom event handler can be a command template string # or a command that takes 4 arguments (classic interface) # Note quote() fails on None, need str(None). try: handler_data = { "event": quote(event), "suite": quote(self.suite), "point": quote(str(itask.point)), "name": quote(itask.tdef.name), "submit_num": itask.submit_num, "id": quote(itask.identity), "message": quote(message), "batch_sys_name": quote(str(itask.summary['batch_sys_name'])), "batch_sys_job_id": quote(str(itask.summary['submit_method_id'])), "submit_time": quote(str(itask.summary['submitted_time_string'])), "start_time": quote(str(itask.summary['started_time_string'])), "finish_time": quote(str(itask.summary['finished_time_string'])), "user@host": quote(user_at_host) } if self.suite_cfg: for key, value in self.suite_cfg.items(): if key == "URL": handler_data["suite_url"] = quote(value) else: handler_data["suite_" + key] = quote(value) if itask.tdef.rtconfig['meta']: for key, value in itask.tdef.rtconfig['meta'].items(): if key == "URL": handler_data["task_url"] = quote(value) handler_data[key] = quote(value) cmd = handler % (handler_data) except KeyError as exc: message = "%s/%s/%02d %s bad template: %s" % ( itask.point, itask.tdef.name, itask.submit_num, key1, exc) LOG.error(message) continue if cmd == handler: # Nothing substituted, assume classic interface cmd = "%s '%s' '%s' '%s' '%s'" % (handler, event, self.suite, itask.identity, message) LOG.debug("Queueing %s handler: %s" % (event, cmd), itask=itask) self.event_timers[id_key] = (TaskActionTimer( CustomTaskEventHandlerContext( key1, self.HANDLER_CUSTOM, cmd, ), retry_delays))
def get_auth_item(self, item, reg, owner=None, host=None, content=False): """Locate/load passphrase, SSL private key, SSL certificate, etc. Return file name, or content of file if content=True is set. Files are searched from these locations in order: 1/ For running task jobs, service directory under: a/ $CYLC_SUITE_RUN_DIR for remote jobs. b/ $CYLC_SUITE_RUN_DIR_ON_SUITE_HOST for local jobs or remote jobs with SSH messaging. 2/ (Passphrases only) From memory cache, for remote suite passphrases. Don't use if content=False. 3/ For suite on local user@host. The suite service directory. 4/ Location under $HOME/.cylc/ for remote suite control from accounts that do not actually need the suite definition directory to be installed: $HOME/.cylc/auth/SUITE_OWNER@SUITE_HOST/SUITE_NAME/ 5/ For remote suites, try locating the file from the suite service directory on remote owner@host via SSH. If content=False, the value of the located file will be dumped under: $HOME/.cylc/auth/SUITE_OWNER@SUITE_HOST/SUITE_NAME/ """ if item not in [ self.FILE_BASE_PASSPHRASE, self.FILE_BASE_CONTACT, self.FILE_BASE_CONTACT2]: raise ValueError("%s: item not recognised" % item) if item == self.FILE_BASE_PASSPHRASE: self.can_disk_cache_passphrases[(reg, owner, host)] = False if reg == os.getenv('CYLC_SUITE_NAME'): env_keys = [] if 'CYLC_SUITE_RUN_DIR' in os.environ: # 1(a)/ Task messaging call. env_keys.append('CYLC_SUITE_RUN_DIR') elif self.KEY_SUITE_RUN_DIR_ON_SUITE_HOST in os.environ: # 1(b)/ Task messaging call via ssh messaging. env_keys.append(self.KEY_SUITE_RUN_DIR_ON_SUITE_HOST) for key in env_keys: path = os.path.join(os.environ[key], self.DIR_BASE_SRV) if content: value = self._load_local_item(item, path) else: value = self._locate_item(item, path) if value: return value # 2/ From memory cache if item in self.cache: my_owner = owner my_host = host if my_owner is None: my_owner = get_user() if my_host is None: my_host = get_host() try: return self.cache[item][(reg, my_owner, my_host)] except KeyError: pass # 3/ Local suite service directory if self._is_local_auth_ok(reg, owner, host): path = self.get_suite_srv_dir(reg) if content: value = self._load_local_item(item, path) else: value = self._locate_item(item, path) if value: return value # 4/ Disk cache for remote suites if owner is not None and host is not None: paths = [self._get_cache_dir(reg, owner, host)] short_host = host.split('.', 1)[0] if short_host != host: paths.append(self._get_cache_dir(reg, owner, short_host)) for path in paths: if content: value = self._load_local_item(item, path) else: value = self._locate_item(item, path) if value: return value # 5/ Use SSH to load content from remote owner@host # Note: It is not possible to find ".service/contact2" on the suite # host, because it is installed on task host by "cylc remote-init" on # demand. if item != self.FILE_BASE_CONTACT2: value = self._load_remote_item(item, reg, owner, host) if value: if item == self.FILE_BASE_PASSPHRASE: self.can_disk_cache_passphrases[(reg, owner, host)] = True if not content: path = self._get_cache_dir(reg, owner, host) self._dump_item(path, item, value) value = os.path.join(path, item) return value raise SuiteServiceFileError("Couldn't get %s" % item)
def _load_remote_item(self, item, reg, owner, host): """Load content of service item from remote [owner@]host via SSH.""" if not is_remote(host, owner): return if host is None: host = 'localhost' if owner is None: owner = get_user() if item == self.FILE_BASE_CONTACT and not is_remote_host(host): # Attempt to read suite contact file via the local filesystem. path = r'%(run_d)s/%(srv_base)s' % { 'run_d': glbl_cfg().get_derived_host_item( reg, 'suite run directory', 'localhost', owner, replace_home=False), 'srv_base': self.DIR_BASE_SRV, } content = self._load_local_item(item, path) if content is not None: return content # Else drop through and attempt via ssh to the suite account. # Prefix STDOUT to ensure returned content is relevant prefix = r'[CYLC-AUTH] %(suite)s' % {'suite': reg} # Attempt to cat passphrase file under suite service directory script = ( r"""echo '%(prefix)s'; """ r'''cat "%(run_d)s/%(srv_base)s/%(item)s"''' ) % { 'prefix': prefix, 'run_d': glbl_cfg().get_derived_host_item( reg, 'suite run directory', host, owner), 'srv_base': self.DIR_BASE_SRV, 'item': item } import shlex command = shlex.split( glbl_cfg().get_host_item('ssh command', host, owner)) command += ['-n', owner + '@' + host, script] from subprocess import Popen, PIPE try: proc = Popen( command, stdin=open(os.devnull), stdout=PIPE, stderr=PIPE) except OSError: if cylc.flags.debug: import traceback traceback.print_exc() return out, err = (f.decode() for f in proc.communicate()) ret_code = proc.wait() # Extract passphrase from STDOUT # It should live in the line with the correct prefix content = "" can_read = False for line in out.splitlines(True): if can_read: content += line elif line.strip() == prefix: can_read = True if not content or ret_code: LOG.debug( '$ %(command)s # code=%(ret_code)s\n%(err)s', { 'command': command, # STDOUT may contain passphrase, so not safe to print # 'out': out, 'err': err, 'ret_code': ret_code, }) return return content
def _load_remote_item(self, item, reg, owner, host): """Load content of service item from remote [owner@]host via SSH.""" if not is_remote(host, owner): return if host is None: host = 'localhost' if owner is None: owner = get_user() if item == self.FILE_BASE_CONTACT and not is_remote_host(host): # Attempt to read suite contact file via the local filesystem. path = r'%(run_d)s/%(srv_base)s' % { 'run_d': glbl_cfg().get_derived_host_item(reg, 'suite run directory', 'localhost', owner, replace_home=False), 'srv_base': self.DIR_BASE_SRV, } content = self._load_local_item(item, path) if content is not None: return content # Else drop through and attempt via ssh to the suite account. # Prefix STDOUT to ensure returned content is relevant prefix = r'[CYLC-AUTH] %(suite)s' % {'suite': reg} # Attempt to cat passphrase file under suite service directory script = (r"""echo '%(prefix)s'; """ r'''cat "%(run_d)s/%(srv_base)s/%(item)s"''') % { 'prefix': prefix, 'run_d': glbl_cfg().get_derived_host_item( reg, 'suite run directory', host, owner), 'srv_base': self.DIR_BASE_SRV, 'item': item } import shlex command = shlex.split(glbl_cfg().get_host_item('ssh command', host, owner)) command += ['-n', owner + '@' + host, script] from subprocess import Popen, PIPE try: proc = Popen(command, stdin=open(os.devnull), stdout=PIPE, stderr=PIPE) except OSError: if cylc.flags.debug: import traceback traceback.print_exc() return out, err = proc.communicate() ret_code = proc.wait() # Extract passphrase from STDOUT # It should live in the line with the correct prefix content = "" can_read = False for line in out.splitlines(True): if can_read: content += line elif line.strip() == prefix: can_read = True if not content or ret_code: LOG.debug( '$ %(command)s # code=%(ret_code)s\n%(err)s', { 'command': command, # STDOUT may contain passphrase, so not safe to print # 'out': out, 'err': err, 'ret_code': ret_code, }) return return content
def test_get_user(self): """get_user.""" self.assertEqual(os.getenv('USER'), get_user())
def get_auth_item(self, item, reg, owner=None, host=None, content=False): """Locate/load passphrase, SSL private key, SSL certificate, etc. Return file name, or content of file if content=True is set. Files are searched from these locations in order: 1/ For running task jobs, service directory under: a/ $CYLC_SUITE_RUN_DIR for remote jobs. b/ $CYLC_SUITE_RUN_DIR_ON_SUITE_HOST for local jobs or remote jobs with SSH messaging. 2/ (Passphrases only) From memory cache, for remote suite passphrases. Don't use if content=False. 3/ For suite on local user@host. The suite service directory. 4/ Location under $HOME/.cylc/ for remote suite control from accounts that do not actually need the suite definition directory to be installed: $HOME/.cylc/auth/SUITE_OWNER@SUITE_HOST/SUITE_NAME/ 5/ For remote suites, try locating the file from the suite service directory on remote owner@host via SSH. If content=False, the value of the located file will be dumped under: $HOME/.cylc/auth/SUITE_OWNER@SUITE_HOST/SUITE_NAME/ """ if item not in [ self.FILE_BASE_SSL_CERT, self.FILE_BASE_SSL_PEM, self.FILE_BASE_PASSPHRASE, self.FILE_BASE_CONTACT, self.FILE_BASE_CONTACT2 ]: raise ValueError("%s: item not recognised" % item) if item == self.FILE_BASE_PASSPHRASE: self.can_disk_cache_passphrases[(reg, owner, host)] = False if reg == os.getenv('CYLC_SUITE_NAME'): env_keys = [] if 'CYLC_SUITE_RUN_DIR' in os.environ: # 1(a)/ Task messaging call. env_keys.append('CYLC_SUITE_RUN_DIR') elif self.KEY_SUITE_RUN_DIR_ON_SUITE_HOST in os.environ: # 1(b)/ Task messaging call via ssh messaging. env_keys.append(self.KEY_SUITE_RUN_DIR_ON_SUITE_HOST) for key in env_keys: path = os.path.join(os.environ[key], self.DIR_BASE_SRV) if content: value = self._load_local_item(item, path) else: value = self._locate_item(item, path) if value: return value # 2/ From memory cache if item in self.cache: my_owner = owner my_host = host if my_owner is None: my_owner = get_user() if my_host is None: my_host = get_host() try: return self.cache[item][(reg, my_owner, my_host)] except KeyError: pass # 3/ Local suite service directory if self._is_local_auth_ok(reg, owner, host): path = self.get_suite_srv_dir(reg) if content: value = self._load_local_item(item, path) else: value = self._locate_item(item, path) if value: return value # 4/ Disk cache for remote suites if owner is not None and host is not None: paths = [self._get_cache_dir(reg, owner, host)] short_host = host.split('.', 1)[0] if short_host != host: paths.append(self._get_cache_dir(reg, owner, short_host)) for path in paths: if content: value = self._load_local_item(item, path) else: value = self._locate_item(item, path) if value: return value # 5/ Use SSH to load content from remote owner@host # Note: It is not possible to find ".service/contact2" on the suite # host, because it is installed on task host by "cylc remote-init" on # demand. if item != self.FILE_BASE_CONTACT2: value = self._load_remote_item(item, reg, owner, host) if value: if item == self.FILE_BASE_PASSPHRASE: self.can_disk_cache_passphrases[(reg, owner, host)] = True if not content: path = self._get_cache_dir(reg, owner, host) self._dump_item(path, item, value) value = os.path.join(path, item) return value raise SuiteServiceFileError("Couldn't get %s" % item)
def _setup_custom_event_handlers(self, itask, event, message): """Set up custom task event handlers.""" handlers = self._get_events_conf(itask, event + ' handler') if (handlers is None and event in self._get_events_conf(itask, 'handler events', [])): handlers = self._get_events_conf(itask, 'handlers') if handlers is None: return retry_delays = self._get_events_conf( itask, 'handler retry delays', self.get_host_conf(itask, "task event handler retry delays")) if not retry_delays: retry_delays = [0] # There can be multiple custom event handlers for i, handler in enumerate(handlers): if event in self.NON_UNIQUE_EVENTS: key1 = ( '%s-%02d' % (self.HANDLER_CUSTOM, i), '%s-%d' % (event, itask.non_unique_events.get(event, 1))) else: key1 = ('%s-%02d' % (self.HANDLER_CUSTOM, i), event) id_key = ( key1, str(itask.point), itask.tdef.name, itask.submit_num) if id_key in self.event_timers: continue # Note: user@host may not always be set for a submit number, e.g. # on late event or if host select command fails. Use null string to # prevent issues in this case. user_at_host = itask.summary['job_hosts'].get(itask.submit_num, '') if user_at_host and '@' not in user_at_host: # (only has 'user@' on the front if user is not suite owner). user_at_host = '%s@%s' % (get_user(), user_at_host) # Custom event handler can be a command template string # or a command that takes 4 arguments (classic interface) # Note quote() fails on None, need str(None). try: handler_data = { "event": quote(event), "suite": quote(self.suite), 'suite_uuid': quote(str(self.uuid_str)), "point": quote(str(itask.point)), "name": quote(itask.tdef.name), "submit_num": itask.submit_num, "try_num": itask.get_try_num(), "id": quote(itask.identity), "message": quote(message), "batch_sys_name": quote( str(itask.summary['batch_sys_name'])), "batch_sys_job_id": quote( str(itask.summary['submit_method_id'])), "submit_time": quote( str(itask.summary['submitted_time_string'])), "start_time": quote( str(itask.summary['started_time_string'])), "finish_time": quote( str(itask.summary['finished_time_string'])), "user@host": quote(user_at_host) } if self.suite_cfg: for key, value in self.suite_cfg.items(): if key == "URL": handler_data["suite_url"] = quote(value) else: handler_data["suite_" + key] = quote(value) if itask.tdef.rtconfig['meta']: for key, value in itask.tdef.rtconfig['meta'].items(): if key == "URL": handler_data["task_url"] = quote(value) handler_data[key] = quote(value) cmd = handler % (handler_data) except KeyError as exc: message = "%s/%s/%02d %s bad template: %s" % ( itask.point, itask.tdef.name, itask.submit_num, key1, exc) LOG.error(message) continue if cmd == handler: # Nothing substituted, assume classic interface cmd = "%s '%s' '%s' '%s' '%s'" % ( handler, event, self.suite, itask.identity, message) LOG.debug("[%s] -Queueing %s handler: %s", itask, event, cmd) self.event_timers[id_key] = ( TaskActionTimer( CustomTaskEventHandlerContext( key1, self.HANDLER_CUSTOM, cmd, ), retry_delays))
def test_get_user(self): """get_user.""" self.assertEqual(os.getenv('USER'), get_user())
def update_suites_info(updater, full_mode=False): """Return mapping of suite info by host, owner and suite name. Args: updater (object): gscan or gpanel updater: Compulsory attributes from updater: hosts: hosts to scan owner_pattern: re to filter results by owners suite_info_map: previous results returned by this function Optional attributes from updater: timeout: communication timeout full_mode (boolean): update in full mode? Return: dict: {(host, owner, name): suite_info, ...} where each "suite_info" is a dict with keys: KEY_GROUP: group name of suite KEY_META: suite metadata (new in 7.6) KEY_OWNER: suite owner name KEY_PORT: suite port, for running suites only KEY_STATES: suite state KEY_TASKS_BY_STATE: tasks by state KEY_TITLE: suite title KEY_UPDATE_TIME: last update time of suite """ # Compulsory attributes from updater # hosts - hosts to scan, or the default set in the site/user global.rc # owner_pattern - return only suites with owners matching this compiled re # suite_info_map - previous results returned by this function # Optional attributes from updater # timeout - communication timeout owner_pattern = updater.owner_pattern timeout = getattr(updater, "comms_timeout", None) # name_pattern - return only suites with names matching this compiled re name_pattern = getattr(updater, "name_pattern", None) # Determine items to scan results = {} items = [] if full_mode and updater.hosts: # Scan full port range on all hosts items.extend(updater.hosts) if owner_pattern is None: owner_pattern = re.compile(r"\A" + get_user() + r"\Z") elif full_mode: # Get (host, port) list from file system items.extend(get_scan_items_from_fs(owner_pattern, updater)) else: # Scan suites in previous results only for (host, owner, name), prev_result in updater.suite_info_map.items(): port = prev_result.get(KEY_PORT) if port: items.append((host, port)) else: results[(host, owner, name)] = prev_result if not items: return results if cylc.flags.debug: sys.stderr.write( 'Scan items:%s%s\n' % (DEBUG_DELIM, DEBUG_DELIM.join(str(item) for item in items))) # Scan for host, port, result in scan_many(items, timeout=timeout, updater=updater): if updater.quit: return if (name_pattern and not name_pattern.match(result[KEY_NAME]) or owner_pattern and not owner_pattern.match(result[KEY_OWNER])): continue try: result[KEY_PORT] = port results[(host, result[KEY_OWNER], result[KEY_NAME])] = result result[KEY_UPDATE_TIME] = int(float(result[KEY_UPDATE_TIME])) except (KeyError, TypeError, ValueError): pass expire_threshold = time() - DURATION_EXPIRE_STOPPED for (host, owner, name), prev_result in updater.suite_info_map.items(): if updater.quit: return if ((host, owner, name) in results or owner_pattern and not owner_pattern.match(owner) or name_pattern and not name_pattern.match(name)): # OK if suite already in current results set. # Don't bother if: # * previous owner does not match current owner pattern # * previous suite name does not match current name pattern continue if prev_result.get(KEY_PORT): # A previously running suite is no longer running. # Get suite info with "ls-checkpoints", if possible, and include in # the results set. try: prev_result.update( _update_stopped_suite_info((host, owner, name))) del prev_result[KEY_PORT] except (IndexError, TypeError, ValueError): continue if prev_result.get(KEY_UPDATE_TIME, 0) > expire_threshold: results[(host, owner, name)] = prev_result return results