def check_suite_db_compatibility(self): """Raises SuiteServiceFileError if the existing suite database is incompatible with the current version of Cylc.""" if not os.path.isfile(self.pri_path): raise FileNotFoundError(self.pri_path) incompat_msg = ( f"Workflow database is incompatible with Cylc {CYLC_VERSION}") pri_dao = self.get_pri_dao() try: last_run_ver = pri_dao.connect().execute( f'SELECT value FROM {self.TABLE_SUITE_PARAMS} ' f'WHERE key == "{self.KEY_CYLC_VERSION}"').fetchone()[0] except TypeError: raise SuiteServiceFileError(f"{incompat_msg}, or is corrupted") finally: pri_dao.close() try: last_run_ver = packaging.version.Version(last_run_ver) except packaging.version.InvalidVersion: last_run_ver = packaging.version.LegacyVersion(last_run_ver) restart_incompat_ver = packaging.version.Version( CylcSuiteDAO.RESTART_INCOMPAT_VERSION) if last_run_ver <= restart_incompat_ver: raise SuiteServiceFileError( f"{incompat_msg} (workflow last run with Cylc {last_run_ver})")
def _socket_bind(self, min_port, max_port, srv_prv_key_loc=None): """Bind socket. Will use a port range provided to select random ports. """ if srv_prv_key_loc is None: # Create new KeyInfo object for the server private key suite_srv_dir = get_suite_srv_dir(self.suite) srv_prv_key_info = KeyInfo(KeyType.PRIVATE, KeyOwner.SERVER, suite_srv_dir=suite_srv_dir) else: srv_prv_key_info = KeyInfo(KeyType.PRIVATE, KeyOwner.SERVER, full_key_path=srv_prv_key_loc) # create socket self.socket = self.context.socket(self.pattern) self._socket_options() try: server_public_key, server_private_key = zmq.auth.load_certificate( srv_prv_key_info.full_key_path) except (ValueError): raise SuiteServiceFileError(f"Failed to find server's public " f"key in " f"{srv_prv_key_info.full_key_path}.") except (OSError): raise SuiteServiceFileError(f"IO error opening server's private " f"key from " f"{srv_prv_key_info.full_key_path}.") if server_private_key is None: # this can't be caught by exception raise SuiteServiceFileError(f"Failed to find server's private " f"key in " f"{srv_prv_key_info.full_key_path}.") self.socket.curve_publickey = server_public_key self.socket.curve_secretkey = server_private_key self.socket.curve_server = True try: if min_port == max_port: self.port = min_port self.socket.bind(f'tcp://*:{min_port}') else: self.port = self.socket.bind_to_random_port( 'tcp://*', min_port, max_port) except (zmq.error.ZMQError, zmq.error.ZMQBindError) as exc: raise CylcError(f'could not start Cylc ZMQ server: {exc}') if self.barrier is not None: self.barrier.wait()
def _check_child_dirs(path, depth_count=1): for result in os.scandir(path): if result.is_dir() and not result.is_symlink(): if is_valid_run_dir(result.path): raise SuiteServiceFileError(exc_msg % (reg, result.path)) if depth_count < MAX_SCAN_DEPTH: _check_child_dirs(result.path, depth_count + 1)
def check_nested_run_dirs(reg): """Disallow nested run dirs e.g. trying to register foo/bar where foo is already a valid suite directory. Args: reg (str): suite name """ exc_msg = ( 'Nested run directories not allowed - cannot register suite name ' '"%s" as "%s" is already a valid run directory.') def _check_child_dirs(path, depth_count=1): for result in os.scandir(path): if result.is_dir() and not result.is_symlink(): if is_valid_run_dir(result.path): raise SuiteServiceFileError(exc_msg % (reg, result.path)) if depth_count < MAX_SCAN_DEPTH: _check_child_dirs(result.path, depth_count + 1) reg_path = os.path.normpath(reg) parent_dir = os.path.dirname(reg_path) while parent_dir != '': if is_valid_run_dir(parent_dir): raise SuiteServiceFileError( exc_msg % (reg, get_cylc_run_abs_path(parent_dir))) parent_dir = os.path.dirname(parent_dir) reg_path = get_cylc_run_abs_path(reg_path) if os.path.isdir(reg_path): _check_child_dirs(reg_path)
def parse_suite_arg(options, arg): """From CLI arg "SUITE", return suite name and flow.cylc path. If arg is a registered suite, suite name is the registered name. If arg is a directory, suite name is the base name of the directory. If arg is a file, suite name is the base name of its container directory. """ if arg == '.': arg = os.getcwd() try: path = get_flow_file(arg, options.suite_owner) name = arg except SuiteServiceFileError: arg = os.path.abspath(arg) if os.path.isdir(arg): path = os.path.join(arg, SuiteFiles.FLOW_FILE) name = os.path.basename(arg) if not os.path.exists(path): # Probably using deprecated suite.rc path = os.path.join(arg, SuiteFiles.SUITE_RC) if not os.path.exists(path): raise SuiteServiceFileError( f'no flow.cylc or suite.rc in {arg}') else: LOG.warning( f'The filename "{SuiteFiles.SUITE_RC}" is deprecated ' f'in favor of "{SuiteFiles.FLOW_FILE}".') else: path = arg name = os.path.basename(os.path.dirname(arg)) return name, path
def init_clean(reg, opts): """Initiate the process of removing a stopped workflow from the local scheduler filesystem and remote hosts. Args: reg (str): Workflow name. opts (optparse.Values): CLI options object for cylc clean. """ local_run_dir = Path(get_workflow_run_dir(reg)) try: _clean_check(reg, local_run_dir) except FileNotFoundError as exc: LOG.info(str(exc)) return platform_names = None try: platform_names = get_platforms_from_db(local_run_dir) except FileNotFoundError: LOG.info("No workflow database - will only clean locally") except SuiteServiceFileError as exc: raise SuiteServiceFileError(f"Cannot clean - {exc}") if platform_names and platform_names != {'localhost'}: remote_clean(reg, platform_names, opts.remote_timeout) LOG.info("Cleaning on local filesystem") clean(reg)
def my_load_contact_file(reg, _): if reg == 'good': return { 'host': 'localhost', 'port': 9999, 'pub_port': 1234, 'api': str(API), } else: raise SuiteServiceFileError(reg)
def on_restart(self): """Check & vacuum the runtime DB on restart.""" if not os.path.isfile(self.pri_path): raise SuiteServiceFileError( 'Cannot restart as suite database not found') self.check_suite_db_compatibility() pri_dao = self.get_pri_dao() pri_dao.vacuum() self.n_restart = pri_dao.select_suite_params_restart_count() + 1 self.put_suite_params_1(self.KEY_RESTART_COUNT, self.n_restart) pri_dao.close()
def load_contact_file(reg): """Load contact file. Return data as key=value dict.""" file_base = SuiteFiles.Service.CONTACT path = get_suite_srv_dir(reg) file_content = _load_local_item(file_base, path) if file_content: data = {} for line in file_content.splitlines(): key, value = [item.strip() for item in line.split("=", 1)] data[key] = value return data else: raise SuiteServiceFileError("Couldn't load contact file")
def check_suite_db_compatibility(self): """Raises SuiteServiceFileError if the existing suite database is incompatible with the current version of Cylc.""" pri_dao = self.get_pri_dao() try: last_run_ver = pri_dao.connect().execute( f'SELECT value FROM {self.TABLE_SUITE_PARAMS} ' f'WHERE key == "{self.KEY_CYLC_VERSION}"').fetchone()[0] except TypeError: raise SuiteServiceFileError( 'Cannot restart suite as the suite database is incompatible ' f'with Cylc {CYLC_VERSION}') pri_dao.close() try: last_run_ver = packaging.version.Version(last_run_ver) except packaging.version.InvalidVersion: last_run_ver = packaging.version.LegacyVersion(last_run_ver) restart_incompat_ver = packaging.version.Version( CylcSuiteDAO.RESTART_INCOMPAT_VERSION) if last_run_ver <= restart_incompat_ver: raise SuiteServiceFileError( f'Cannot restart suite last run with Cylc {last_run_ver} as ' f'the suite database is incompatible with Cylc {CYLC_VERSION}')
async def load_contact_file_async(reg, run_dir=None): if not run_dir: path = Path(get_suite_srv_dir(reg), SuiteFiles.Service.CONTACT) else: path = Path(run_dir, SuiteFiles.Service.DIRNAME, SuiteFiles.Service.CONTACT) try: async with aiofiles.open(path, mode='r') as cont: data = {} async for line in cont: key, value = [item.strip() for item in line.split("=", 1)] data[key] = value return data except IOError: raise SuiteServiceFileError("Couldn't load contact file")
def restart_check(self): """Check & vacuum the runtime DB for a restart. Raises SuiteServiceFileError if DB is incompatible. Returns False if DB doesn't exist, else True. """ try: self.check_suite_db_compatibility() except FileNotFoundError: return False except SuiteServiceFileError as exc: raise SuiteServiceFileError(f"Cannot restart - {exc}") pri_dao = self.get_pri_dao() try: pri_dao.vacuum() self.n_restart = pri_dao.select_suite_params_restart_count() + 1 self.put_suite_params_1(self.KEY_RESTART_COUNT, self.n_restart) finally: pri_dao.close() return True
def _clean_check(reg, run_dir): """Check whether a workflow can be cleaned. Args: reg (str): Workflow name. run_dir (str): Path to the workflow run dir on the filesystem. """ validate_flow_name(reg) reg = os.path.normpath(reg) if reg.startswith('.'): raise WorkflowFilesError( "Workflow name cannot be a path that points to the cylc-run " "directory or above") if not run_dir.is_dir() and not run_dir.is_symlink(): msg = f"No directory to clean at {run_dir}" raise FileNotFoundError(msg) try: detect_old_contact_file(reg) except SuiteServiceFileError as exc: raise SuiteServiceFileError( f"Cannot remove running workflow.\n\n{exc}")
def get_suite_source_dir(self, reg, suite_owner=None): """Return the source directory path of a suite. Will register un-registered suites located in the cylc run dir. """ srv_d = self.get_suite_srv_dir(reg, suite_owner) fname = os.path.join(srv_d, self.FILE_BASE_SOURCE) try: source = os.readlink(fname) except OSError: suite_d = os.path.dirname(srv_d) if os.path.exists(suite_d) and not is_remote_user(suite_owner): # suite exists but is not yet registered self.register(reg=reg, source=suite_d) return suite_d else: raise SuiteServiceFileError("Suite not found %s" % reg) else: if os.path.isabs(source): return source else: return os.path.normpath(os.path.join(srv_d, source))
def get_suite_source_dir(reg, suite_owner=None): """Return the source directory path of a suite. Will register un-registered suites located in the cylc run dir. """ srv_d = get_suite_srv_dir(reg, suite_owner) fname = os.path.join(srv_d, SuiteFiles.Service.SOURCE) try: source = os.readlink(fname) except OSError: suite_d = os.path.dirname(srv_d) if os.path.exists(suite_d) and not is_remote_user(suite_owner): # suite exists but is not yet registered register(reg=reg, source=suite_d) return suite_d raise SuiteServiceFileError(f"Suite not found: {reg}") else: if not os.path.isabs(source): source = os.path.normpath(os.path.join(srv_d, source)) flow_file_path = os.path.join(source, SuiteFiles.FLOW_FILE) if not os.path.exists(flow_file_path): # suite exists but is probably using deprecated suite.rc register(reg=reg, source=source) return source
def mocked_detect_old_contact_file(reg): if props.get('not stopped'): raise SuiteServiceFileError('Mocked error')
def detect_old_contact_file(self, reg, check_host_port=None): """Detect old suite contact file. If an old contact file does not exist, do nothing. If one does exist but the suite process is definitely not alive, remove it. If one exists and the suite process is still alive, raise SuiteServiceFileError. If check_host_port is specified and does not match the (host, port) value in the old contact file, raise AssertionError. Args: reg (str): suite name check_host_port (tuple): (host, port) to check against Raise: AssertionError: If old contact file exists but does not have matching (host, port) with value of check_host_port. SuiteServiceFileError: If old contact file exists and the suite process still alive. """ # An old suite of the same name may be running if a contact file exists # and can be loaded. try: data = self.load_contact_file(reg) old_host = data[self.KEY_HOST] old_port = data[self.KEY_PORT] old_proc_str = data[self.KEY_PROCESS] except (IOError, ValueError, SuiteServiceFileError): # Contact file does not exist or corrupted, should be OK to proceed return if check_host_port and check_host_port != (old_host, int(old_port)): raise AssertionError("%s != (%s, %s)" % (check_host_port, old_host, old_port)) # Run the "ps" command to see if the process is still running or not. # If the old suite process is still running, it should show up with the # same command line as before. # Terminate command after 10 seconds to prevent hanging, etc. old_pid_str = old_proc_str.split(None, 1)[0].strip() cmd = ["timeout", "10", "ps", self.PS_OPTS, str(old_pid_str)] if is_remote_host(old_host): import shlex ssh_str = str(glbl_cfg().get_host_item("ssh command", old_host)) cmd = shlex.split(ssh_str) + ["-n", old_host] + cmd from subprocess import Popen, PIPE, DEVNULL # nosec from time import sleep, time proc = Popen(cmd, stdin=DEVNULL, stdout=PIPE, stderr=PIPE) # nosec # Terminate command after 10 seconds to prevent hanging SSH, etc. timeout = time() + 10.0 while proc.poll() is None: if time() > timeout: proc.terminate() sleep(0.1) fname = self.get_contact_file(reg) ret_code = proc.wait() out, err = (f.decode() for f in proc.communicate()) if ret_code: LOG.debug("$ %s # return %d\n%s", ' '.join(cmd), ret_code, err) for line in reversed(out.splitlines()): if line.strip() == old_proc_str: # Suite definitely still running break elif line.split(None, 1)[0].strip() == "PID": # Only "ps" header - "ps" has run, but no matching results. # Suite not running. Attempt to remove suite contact file. try: os.unlink(fname) return except OSError: break raise SuiteServiceFileError( (r"""suite contact file exists: %(fname)s Suite "%(suite)s" is already running, and listening at "%(host)s:%(port)s". To start a new run, stop the old one first with one or more of these: * cylc stop %(suite)s # wait for active tasks/event handlers * cylc stop --kill %(suite)s # kill active tasks and wait * cylc stop --now %(suite)s # don't wait for active tasks * cylc stop --now --now %(suite)s # don't wait * ssh -n "%(host)s" kill %(pid)s # final brute force! """) % { "host": old_host, "port": old_port, "pid": old_pid_str, "fname": fname, "suite": reg, })
def register(self, reg=None, source=None, redirect=False, rundir=None): """Register a suite, or renew its registration. Create suite service directory and symlink to suite source location. Args: reg (str): suite name, default basename($PWD). source (str): directory location of suite.rc file, default $PWD. redirect (bool): allow reuse of existing name and run directory. Return: str: The registered suite name (which may be computed here). Raise: SuiteServiceFileError: No suite.rc file found in source location. Illegal name (can look like a relative path, but not absolute). Another suite already has this name (unless --redirect). """ if reg is None: reg = os.path.basename(os.getcwd()) is_valid, message = SuiteNameValidator.validate(reg) if not is_valid: raise SuiteServiceFileError(f'invalid suite name - {message}') if os.path.isabs(reg): raise SuiteServiceFileError( "suite name cannot be an absolute path: %s" % reg) if source is not None: if os.path.basename(source) == self.FILE_BASE_SUITE_RC: source = os.path.dirname(source) else: source = os.getcwd() # suite.rc must exist so we can detect accidentally reversed args. source = os.path.abspath(source) if not os.path.isfile(os.path.join(source, self.FILE_BASE_SUITE_RC)): raise SuiteServiceFileError("no suite.rc in %s" % source) # Create service dir if necessary. srv_d = self.get_suite_srv_dir(reg) if rundir is None: os.makedirs(srv_d, exist_ok=True) else: suite_run_d, srv_d_name = os.path.split(srv_d) alt_suite_run_d = os.path.join(rundir, reg) alt_srv_d = os.path.join(rundir, reg, srv_d_name) os.makedirs(alt_srv_d, exist_ok=True) os.makedirs(os.path.dirname(suite_run_d), exist_ok=True) if os.path.islink(suite_run_d) and not os.path.exists(suite_run_d): # Remove a bad symlink. os.unlink(suite_run_d) if not os.path.exists(suite_run_d): os.symlink(alt_suite_run_d, suite_run_d) elif not os.path.islink(suite_run_d): raise SuiteServiceFileError( f"Run directory '{suite_run_d}' already exists.") elif alt_suite_run_d != os.readlink(suite_run_d): target = os.readlink(suite_run_d) raise SuiteServiceFileError( f"Symlink '{suite_run_d}' already points to {target}.") # (else already the right symlink) # See if suite already has a source or not try: orig_source = os.readlink( os.path.join(srv_d, self.FILE_BASE_SOURCE)) except OSError: orig_source = None else: if not os.path.isabs(orig_source): orig_source = os.path.normpath(os.path.join( srv_d, orig_source)) if orig_source is not None and source != orig_source: if not redirect: raise SuiteServiceFileError( "the name '%s' already points to %s.\nUse " "--redirect to re-use an existing name and run " "directory." % (reg, orig_source)) LOG.warning( "the name '%(reg)s' points to %(old)s.\nIt will now" " be redirected to %(new)s.\nFiles in the existing %(reg)s run" " directory will be overwritten.\n", { 'reg': reg, 'old': orig_source, 'new': source }) # Remove symlink to the original suite. os.unlink(os.path.join(srv_d, self.FILE_BASE_SOURCE)) # Create symlink to the suite, if it doesn't already exist. if orig_source is None or source != orig_source: target = os.path.join(srv_d, self.FILE_BASE_SOURCE) if (os.path.abspath(source) == os.path.abspath( os.path.dirname(srv_d))): # If source happens to be the run directory, # create .service/source -> .. source_str = ".." else: source_str = source os.symlink(source_str, target) print('REGISTERED %s -> %s' % (reg, source)) return reg
def get_auth_item(self, item, reg, owner=None, host=None, content=False): """Locate/load passphrase, SSL private key, SSL certificate, etc. Return file name, or content of file if content=True is set. Files are searched from these locations in order: 1/ For running task jobs, service directory under: a/ $CYLC_SUITE_RUN_DIR for remote jobs. b/ $CYLC_SUITE_RUN_DIR_ON_SUITE_HOST for local jobs or remote jobs with SSH messaging. 2/ (Passphrases only) From memory cache, for remote suite passphrases. Don't use if content=False. 3/ For suite on local user@host. The suite service directory. 4/ Location under $HOME/.cylc/ for remote suite control from accounts that do not actually need the suite definition directory to be installed: $HOME/.cylc/auth/SUITE_OWNER@SUITE_HOST/SUITE_NAME/ 5/ For remote suites, try locating the file from the suite service directory on remote owner@host via SSH. If content=False, the value of the located file will be dumped under: $HOME/.cylc/auth/SUITE_OWNER@SUITE_HOST/SUITE_NAME/ """ if item not in [ self.FILE_BASE_PASSPHRASE, self.FILE_BASE_CONTACT, self.FILE_BASE_CONTACT2 ]: raise ValueError("%s: item not recognised" % item) if item == self.FILE_BASE_PASSPHRASE: self.can_disk_cache_passphrases[(reg, owner, host)] = False if reg == os.getenv('CYLC_SUITE_NAME'): env_keys = [] if 'CYLC_SUITE_RUN_DIR' in os.environ: # 1(a)/ Task messaging call. env_keys.append('CYLC_SUITE_RUN_DIR') elif self.KEY_SUITE_RUN_DIR_ON_SUITE_HOST in os.environ: # 1(b)/ Task messaging call via ssh messaging. env_keys.append(self.KEY_SUITE_RUN_DIR_ON_SUITE_HOST) for key in env_keys: path = os.path.join(os.environ[key], self.DIR_BASE_SRV) if content: value = self._load_local_item(item, path) else: value = self._locate_item(item, path) if value: return value # 2/ From memory cache if item in self.cache: my_owner = owner my_host = host if my_owner is None: my_owner = get_user() if my_host is None: my_host = get_host() try: return self.cache[item][(reg, my_owner, my_host)] except KeyError: pass # 3/ Local suite service directory if self._is_local_auth_ok(reg, owner, host): path = self.get_suite_srv_dir(reg) if content: value = self._load_local_item(item, path) else: value = self._locate_item(item, path) if value: return value # 4/ Disk cache for remote suites if owner is not None and host is not None: paths = [self._get_cache_dir(reg, owner, host)] short_host = host.split('.', 1)[0] if short_host != host: paths.append(self._get_cache_dir(reg, owner, short_host)) for path in paths: if content: value = self._load_local_item(item, path) else: value = self._locate_item(item, path) if value: return value # 5/ Use SSH to load content from remote owner@host # Note: It is not possible to find ".service/contact2" on the suite # host, because it is installed on task host by "cylc remote-init" on # demand. if item != self.FILE_BASE_CONTACT2: value = self._load_remote_item(item, reg, owner, host) if value: if item == self.FILE_BASE_PASSPHRASE: self.can_disk_cache_passphrases[(reg, owner, host)] = True if not content: path = self._get_cache_dir(reg, owner, host) self._dump_item(path, item, value) value = os.path.join(path, item) return value raise SuiteServiceFileError("Couldn't get %s" % item)
def mocked_load_contact_file(reg, _): if reg == 'good': return {'host': 'localhost', 'port': 9999} else: raise SuiteServiceFileError(reg)
def get_auth_item(item, reg, owner=None, host=None, content=False): """Locate/load passphrase, Curve private-key/certificate ...etc. Return file name, or content of file if content=True is set. Files are searched from these locations in order: 1/ For running task jobs, service directory under: a/ $CYLC_SUITE_RUN_DIR for remote jobs. b/ $CYLC_SUITE_RUN_DIR_ON_SUITE_HOST for local jobs or remote jobs with SSH messaging. 2/ For suite on local user@host. The suite service directory. 3/ Location under $HOME/.cylc/ for remote suite control from accounts that do not actually need the suite definition directory to be installed: $HOME/.cylc/auth/SUITE_OWNER@SUITE_HOST/SUITE_NAME/ 4/ For remote suites, try locating the file from the suite service directory on remote owner@host via SSH. If content=False, the value of the located file will be dumped under: $HOME/.cylc/auth/SUITE_OWNER@SUITE_HOST/SUITE_NAME/ """ if item not in [ SuiteFiles.Service.PASSPHRASE, SuiteFiles.Service.CONTACT, SuiteFiles.Service.CONTACT2, UserFiles.Auth.SERVER_PRIVATE_KEY_CERTIFICATE, UserFiles.Auth.SERVER_PUBLIC_KEY_CERTIFICATE ]: raise ValueError("%s: item not recognised" % item) # For a UserFiles.Auth.SERVER_..._KEY_CERTIFICATE, only need to check Case # '3/' (always ignore content i.e. content=False), so check these first: if item in (UserFiles.Auth.SERVER_PRIVATE_KEY_CERTIFICATE, UserFiles.Auth.SERVER_PUBLIC_KEY_CERTIFICATE): auth_path = os.path.join(get_suite_srv_dir(reg), UserFiles.Auth.DIRNAME) public_key_dir, private_key_dir = return_key_locations(auth_path) if item == UserFiles.Auth.SERVER_PRIVATE_KEY_CERTIFICATE: path = private_key_dir else: path = public_key_dir value = _locate_item(item, path) if value: return value if reg == os.getenv('CYLC_SUITE_NAME'): env_keys = [] if 'CYLC_SUITE_RUN_DIR' in os.environ: # 1(a)/ Task messaging call. env_keys.append('CYLC_SUITE_RUN_DIR') elif ContactFileFields.SUITE_RUN_DIR_ON_SUITE_HOST in os.environ: # 1(b)/ Task messaging call via ssh messaging. env_keys.append(ContactFileFields.SUITE_RUN_DIR_ON_SUITE_HOST) for key in env_keys: path = os.path.join(os.environ[key], SuiteFiles.Service.DIRNAME) if content: value = _load_local_item(item, path) else: value = _locate_item(item, path) if value: return value # 2/ Local suite service directory if _is_local_auth_ok(reg, owner, host): path = get_suite_srv_dir(reg) if content: value = _load_local_item(item, path) else: value = _locate_item(item, path) if value: return value # 3/ Disk cache for remote suites if owner is not None and host is not None: paths = [_get_cache_dir(reg, owner, host)] short_host = host.split('.', 1)[0] if short_host != host: paths.append(_get_cache_dir(reg, owner, short_host)) for path in paths: if content: value = _load_local_item(item, path) else: value = _locate_item(item, path) if value: return value # 4/ Use SSH to load content from remote owner@host # Note: It is not possible to find ".service/contact2" on the suite # host, because it is installed on task host by "cylc remote-init" on # demand. if item != SuiteFiles.Service.CONTACT2: value = _load_remote_item(item, reg, owner, host) if value: if not content: path = _get_cache_dir(reg, owner, host) _dump_item(path, item, value) value = os.path.join(path, item) return value raise SuiteServiceFileError("Couldn't get %s" % item)
def get_auth_item(item, reg, owner=None, host=None, content=False): """Locate/load Curve private-key/ ...etc. Return file name, or content of file if content=True is set. Files are searched from these locations in order: 1/ Server Curve ZMQ keys located in suite service directory Client Curve ZMQ keys located in suite service directory (private keys) suite service directory/client_public_keys (public keys) 2/ For running task jobs, service directory under: a/ $CYLC_SUITE_RUN_DIR for remote jobs. b/ $CYLC_SUITE_RUN_DIR_ON_SUITE_HOST for local jobs or remote jobs with SSH messaging. 3/ For suite on local user@host. The suite service directory. 4/ Location under $HOME/.cylc/ for remote suite control from accounts that do not actually need the suite definition directory to be installed: $HOME/.cylc/auth/SUITE_OWNER@SUITE_HOST/SUITE_NAME/ 5/ For remote suites, try locating the file from the suite service directory on remote owner@host via SSH. If content=False, the value of the located file will be dumped under: $HOME/.cylc/auth/SUITE_OWNER@SUITE_HOST/SUITE_NAME/ """ if item not in [SuiteFiles.Service.CONTACT, SuiteFiles.Service.CONTACT2 ] and not isinstance(item, KeyInfo): raise ValueError(f"{item}: item not recognised") # 1 (a) if isinstance(item, KeyInfo): item_location = _locate_item(item.file_name, item.key_path) # TODO: separate key file 'get' into own function # Additional searches below need a file name, not a complex object item = item.file_name if item_location: return item_location if reg == os.getenv('CYLC_SUITE_NAME'): env_keys = [] if 'CYLC_SUITE_RUN_DIR' in os.environ: # 2(a)/ Task messaging call. env_keys.append('CYLC_SUITE_RUN_DIR') elif ContactFileFields.SUITE_RUN_DIR_ON_SUITE_HOST in os.environ: # 2(b)/ Task messaging call via ssh messaging. env_keys.append(ContactFileFields.SUITE_RUN_DIR_ON_SUITE_HOST) for key in env_keys: path = os.path.join(os.environ[key], SuiteFiles.Service.DIRNAME) if content: value = _load_local_item(item, path) else: value = _locate_item(item, path) if value: return value # 3/ Local suite service directory if _is_local_auth_ok(reg, owner, host): path = get_suite_srv_dir(reg) if content: value = _load_local_item(item, path) else: value = _locate_item(item, path) if value: return value # 4/ Disk cache for remote suites if owner is not None and host is not None: paths = [_get_cache_dir(reg, owner, host)] short_host = host.split('.', 1)[0] if short_host != host: paths.append(_get_cache_dir(reg, owner, short_host)) for path in paths: if content: value = _load_local_item(item, path) else: value = _locate_item(item, path) if value: return value # 5/ Use SSH to load content from remote owner@host # Note: It is not possible to find ".service/contact2" on the suite # host, because it is installed on task host by "cylc remote-init" on # demand. if item != SuiteFiles.Service.CONTACT2: value = _load_remote_item(item, reg, owner, host) if value: if not content: path = _get_cache_dir(reg, owner, host) _dump_item(path, item, value) value = os.path.join(path, item) return value raise SuiteServiceFileError("Couldn't get %s" % item)
def detect_old_contact_file(reg, check_host_port=None): """Detect old suite contact file. If an old contact file does not exist, do nothing. If one does exist but the suite process is definitely not alive, remove it. If one exists and the suite process is still alive, raise SuiteServiceFileError. If check_host_port is specified and does not match the (host, port) value in the old contact file, raise AssertionError. Args: reg (str): suite name check_host_port (tuple): (host, port) to check against Raise: AssertionError: If old contact file exists but does not have matching (host, port) with value of check_host_port. SuiteServiceFileError: If old contact file exists and the suite process still alive. """ # An old suite of the same name may be running if a contact file exists # and can be loaded. try: data = load_contact_file(reg) old_host = data[ContactFileFields.HOST] old_port = data[ContactFileFields.PORT] old_proc_str = data[ContactFileFields.PROCESS] except (IOError, ValueError, SuiteServiceFileError): # Contact file does not exist or corrupted, should be OK to proceed return if check_host_port and check_host_port != (old_host, int(old_port)): raise AssertionError("%s != (%s, %s)" % ( check_host_port, old_host, old_port)) # Run the "ps" command to see if the process is still running or not. # If the old suite process is still running, it should show up with the # same command line as before. # Terminate command after 10 seconds to prevent hanging, etc. old_pid_str = old_proc_str.split(None, 1)[0].strip() cmd = ["timeout", "10", "ps", PS_OPTS, str(old_pid_str)] if is_remote_host(old_host): import shlex ssh_str = get_platform()["ssh command"] cmd = shlex.split(ssh_str) + ["-n", old_host] + cmd from time import sleep, time proc = Popen(cmd, stdin=DEVNULL, stdout=PIPE, stderr=PIPE) # Terminate command after 10 seconds to prevent hanging SSH, etc. timeout = time() + 10.0 while proc.poll() is None: if time() > timeout: proc.terminate() sleep(0.1) fname = get_contact_file(reg) ret_code = proc.wait() out, err = (f.decode() for f in proc.communicate()) if ret_code: LOG.debug("$ %s # return %d\n%s", ' '.join(cmd), ret_code, err) for line in reversed(out.splitlines()): if line.strip() == old_proc_str: # Suite definitely still running break elif line.split(None, 1)[0].strip() == "PID": # Only "ps" header - "ps" has run, but no matching results. # Suite not running. Attempt to remove suite contact file. try: os.unlink(fname) return except OSError: break raise SuiteServiceFileError( CONTACT_FILE_EXISTS_MSG % { "host": old_host, "port": old_port, "pid": old_pid_str, "fname": fname, "suite": reg, } )
def register(reg=None, source=None, redirect=False, rundir=None): """Register a suite, or renew its registration. Create suite service directory and symlink to suite source location. Args: reg (str): suite name, default basename($PWD). source (str): directory location of flow.cylc file, default $PWD. redirect (bool): allow reuse of existing name and run directory. rundir (str): for overriding the default cylc-run directory. Return: str: The registered suite name (which may be computed here). Raise: SuiteServiceFileError: No flow.cylc file found in source location. Illegal name (can look like a relative path, but not absolute). Another suite already has this name (unless --redirect). Trying to register a suite nested inside of another. """ if reg is None: reg = os.path.basename(os.getcwd()) is_valid, message = SuiteNameValidator.validate(reg) if not is_valid: raise SuiteServiceFileError(f'invalid suite name - {message}') if os.path.isabs(reg): raise SuiteServiceFileError( f'suite name cannot be an absolute path: {reg}') check_nested_run_dirs(reg) if source is not None: if os.path.basename(source) == SuiteFiles.FLOW_FILE: source = os.path.dirname(source) else: source = os.getcwd() # flow.cylc must exist so we can detect accidentally reversed args. source = os.path.abspath(source) flow_file_path = os.path.join(source, SuiteFiles.FLOW_FILE) if not os.path.isfile(flow_file_path): # If using deprecated suite.rc, symlink it into flow.cylc: suite_rc_path = os.path.join(source, SuiteFiles.SUITE_RC) if os.path.isfile(suite_rc_path): os.symlink(suite_rc_path, flow_file_path) LOG.warning( f'The filename "{SuiteFiles.SUITE_RC}" is deprecated in favor ' f'of "{SuiteFiles.FLOW_FILE}". Symlink created.') else: raise SuiteServiceFileError( f'no flow.cylc or suite.rc in {source}') # Create service dir if necessary. srv_d = get_suite_srv_dir(reg) if rundir is None: os.makedirs(srv_d, exist_ok=True) else: suite_run_d, srv_d_name = os.path.split(srv_d) alt_suite_run_d = os.path.join(rundir, reg) alt_srv_d = os.path.join(rundir, reg, srv_d_name) os.makedirs(alt_srv_d, exist_ok=True) os.makedirs(os.path.dirname(suite_run_d), exist_ok=True) if os.path.islink(suite_run_d) and not os.path.exists(suite_run_d): # Remove a bad symlink. os.unlink(suite_run_d) if not os.path.exists(suite_run_d): os.symlink(alt_suite_run_d, suite_run_d) elif not os.path.islink(suite_run_d): raise SuiteServiceFileError( f"Run directory '{suite_run_d}' already exists.") elif alt_suite_run_d != os.readlink(suite_run_d): target = os.readlink(suite_run_d) raise SuiteServiceFileError( f"Symlink '{suite_run_d}' already points to {target}.") # (else already the right symlink) # See if suite already has a source or not try: orig_source = os.readlink( os.path.join(srv_d, SuiteFiles.Service.SOURCE)) except OSError: orig_source = None else: if not os.path.isabs(orig_source): orig_source = os.path.normpath(os.path.join(srv_d, orig_source)) if orig_source is not None and source != orig_source: if not redirect: raise SuiteServiceFileError( f"the name '{reg}' already points to {orig_source}.\nUse " "--redirect to re-use an existing name and run directory.") LOG.warning( f"the name '{reg}' points to {orig_source}.\nIt will now be " f"redirected to {source}.\nFiles in the existing {reg} run " "directory will be overwritten.\n") # Remove symlink to the original suite. os.unlink(os.path.join(srv_d, SuiteFiles.Service.SOURCE)) # Create symlink to the suite, if it doesn't already exist. if orig_source is None or source != orig_source: target = os.path.join(srv_d, SuiteFiles.Service.SOURCE) if (os.path.abspath(source) == os.path.abspath( os.path.dirname(srv_d))): # If source happens to be the run directory, # create .service/source -> .. source_str = ".." else: source_str = source os.symlink(source_str, target) print(f'REGISTERED {reg} -> {source}') return reg
def mocked_detect_old_contact_file(reg): if not_stopped: raise SuiteServiceFileError('Mocked error')