def get_location(workflow: str): """Extract host and port from a workflow's contact file. NB: if it fails to load the workflow contact file, it will exit. Args: workflow (str): workflow name Returns: Tuple[str, int, int]: tuple with the host name and port numbers. Raises: ClientError: if the workflow is not running. CylcVersionError: if target is a Cylc 7 (or earlier) workflow. """ try: contact = load_contact_file(workflow) except ServiceFileError: raise WorkflowStopped(workflow) host = contact[ContactFileFields.HOST] host = get_fqdn_by_host(host) port = int(contact[ContactFileFields.PORT]) if ContactFileFields.PUBLISH_PORT in contact: pub_port = int(contact[ContactFileFields.PUBLISH_PORT]) else: version = (contact['CYLC_VERSION'] if 'CYLC_VERSION' in contact else None) raise CylcVersionError(version=version) return host, port, pub_port
async def test_load_contact_file_async(myflow): cont = await load_contact_file_async(myflow.workflow) assert cont[CFF.HOST] == myflow.host # compare the async interface to the sync interface cont2 = load_contact_file(myflow.workflow) assert cont == cont2
def _check_contact_file(scheduler): try: contact_data = workflow_files.load_contact_file(scheduler.workflow) if contact_data != scheduler.contact_data: raise CylcError('contact file modified') except (AssertionError, IOError, ValueError, ServiceFileError): raise CylcError('%s: contact file corrupted/modified and may be left' % workflow_files.get_contact_file(scheduler.workflow))
def main(parser, options, reg): """CLI for "cylc get-workflow-contact".""" try: data = load_contact_file(reg) except ServiceFileError: raise CylcError( f"{reg}: cannot get contact info, workflow not running?") else: for key, value in sorted(data.items()): print("%s=%s" % (key, value))
async def test_scan_cleans_stuck_contact_files( run, scheduler, flow, one_conf, run_dir, test_dir, ): """Ensure scan tidies up contact files from crashed flows.""" # create a flow reg = flow(one_conf, name='-crashed-') schd = scheduler(reg) srv_dir = Path(run_dir, reg, WorkflowFiles.Service.DIRNAME) tmp_dir = test_dir / 'srv' cont = srv_dir / WorkflowFiles.Service.CONTACT # run the flow, copy the contact, stop the flow, copy back the contact async with run(schd): copytree(srv_dir, tmp_dir) rmtree(srv_dir) copytree(tmp_dir, srv_dir) rmtree(tmp_dir) # the old contact file check uses the CLI command that the flow was run # with to check that whether the flow is running. Because this is an # integration test the process is the pytest process and it is still # running so we need to change the command so that Cylc sees the flow as # having crashed contact_info = load_contact_file(reg) contact_info[ContactFileFields.COMMAND] += 'xyz' dump_contact_file(reg, contact_info) # make sure this flow shows for a regular filesystem-only scan opts = ScanOptions(states='running,paused', format='name') flows = [] await main(opts, write=flows.append, scan_dir=test_dir) assert len(flows) == 1 assert '-crashed-' in flows[0] # the contact file should still be there assert cont.exists() # make sure this flow shows for a regular filesystem-only scan opts = ScanOptions(states='running,paused', format='name', ping=True) flows = [] await main(opts, write=flows.append, scan_dir=test_dir) assert len(flows) == 0 # the contact file should have been removed by the scan assert not cont.exists()
def main(parser: COP, options: 'Values', workflow_id: str) -> None: """CLI for "cylc get-workflow-contact".""" workflow_id, *_ = parse_id( workflow_id, constraint='workflows', ) try: data = load_contact_file(workflow_id) except ServiceFileError: raise CylcError( f"{workflow_id}: cannot get contact info, workflow not running?") else: for key, value in sorted(data.items()): print("%s=%s" % (key, value))
def send_request(self, command, args=None, timeout=None): """Send a request, using ssh. Determines ssh_cmd, cylc_path and login_shell settings from the contact file. Converts message to JSON and sends this to stdin. Executes the Cylc command, then deserialises the output. Use ``__call__`` to call this method. Args: command (str): The name of the endpoint to call. args (dict): Arguments to pass to the endpoint function. timeout (float): Override the default timeout (seconds). Raises: ClientError: Coverall, on error from function call Returns: object: Deserialized output from function called. """ # Set environment variable to determine the communication for use on # the scheduler os.environ["CLIENT_COMMS_METH"] = CommsMeth.SSH.value cmd = ["client"] if timeout: cmd += [f'comms_timeout={timeout}'] cmd += [self.workflow, command] contact = load_contact_file(self.workflow) ssh_cmd = contact[ContactFileFields.SCHEDULER_SSH_COMMAND] login_shell = contact[ContactFileFields.SCHEDULER_USE_LOGIN_SHELL] cylc_path = contact[ContactFileFields.SCHEDULER_CYLC_PATH] cylc_path = None if cylc_path == 'None' else cylc_path if not args: args = {} message = json.dumps(args) proc = _remote_cylc_cmd( cmd, host=self.host, stdin_str=message, ssh_cmd=ssh_cmd, remote_cylc_path=cylc_path, ssh_login_shell=login_shell, capture_process=True) out, err = (f.decode() for f in proc.communicate()) return_code = proc.wait() if return_code: raise ClientError(err, f"return-code={return_code}") return json.loads(out)
def prepare_command(self, command: str, args: Dict, timeout: Union[float, str]): """Prepare command for submission. """ # Set environment variable to determine the communication for use on # the scheduler os.environ["CLIENT_COMMS_METH"] = CommsMeth.SSH.value cmd = ["client"] if timeout: cmd += [f'--comms-timeout={timeout}'] cmd += [self.workflow, command] contact = load_contact_file(self.workflow) ssh_cmd = contact[ContactFileFields.SCHEDULER_SSH_COMMAND] login_shell = contact[ContactFileFields.SCHEDULER_USE_LOGIN_SHELL] cylc_path = contact[ContactFileFields.SCHEDULER_CYLC_PATH] cylc_path = None if cylc_path == 'None' else cylc_path if not args: args = {} message = json.dumps(args) return cmd, ssh_cmd, login_shell, cylc_path, message
def _timeout_handler(workflow: str, host: str, port: Union[int, str]): """Handle the eventuality of a communication timeout with the workflow. Args: workflow (str): workflow name host (str): host name port (Union[int, str]): port number Raises: ClientError: if the workflow has already stopped. """ if workflow is None: return try: contact_data: Dict[str, str] = load_contact_file(workflow) except (IOError, ValueError, ServiceFileError): # Contact file does not exist or corrupted, workflow should be dead return contact_host: str = contact_data.get(ContactFileFields.HOST, '?') contact_port: str = contact_data.get(ContactFileFields.PORT, '?') if ( contact_host != host or contact_port != str(port) ): raise CylcError( f'The workflow is no longer running at {host}:{port}\n' f'It has moved to {contact_host}:{contact_port}' ) # Cannot connect, perhaps workflow is no longer running and is leaving # behind a contact file? try: detect_old_contact_file(workflow, contact_data) except (AssertionError, ServiceFileError): # old contact file exists and the workflow process still alive return else: # the workflow has stopped raise WorkflowStopped(workflow)
def get_location(workflow: str): """Extract host and port from a workflow's contact file. NB: if it fails to load the workflow contact file, it will exit. Args: workflow (str): workflow name Returns: Tuple[str, int, int]: tuple with the host name and port numbers. Raises: ClientError: if the workflow is not running. """ try: contact = load_contact_file(workflow) except ServiceFileError: raise WorkflowStopped(workflow) host = contact[ContactFileFields.HOST] host = get_fqdn_by_host(host) port = int(contact[ContactFileFields.PORT]) pub_port = int(contact[ContactFileFields.PUBLISH_PORT]) return host, port, pub_port
def test_load_contact_file(myflow): cont = load_contact_file(myflow.workflow) assert cont[CFF.HOST] == myflow.host