Ejemplo n.º 1
0
def test_detect_old_contact_file_running(workflow):
    """It should raise an error if the workflow is running."""
    # the workflow is running so we should get a ServiceFileError
    with pytest.raises(ServiceFileError):
        detect_old_contact_file(workflow.reg)
    # the contact file is valid so should be left alone
    assert workflow.contact_file.exists()
Ejemplo n.º 2
0
def test_detect_old_contact_file_none(workflow):
    """It should do nothing if there is no contact file."""
    # remove the contact file
    workflow.contact_file.unlink()
    assert not workflow.contact_file.exists()
    # detect_old_contact_file should return
    detect_old_contact_file(workflow.reg)
    # it should not recreate the contact file
    assert not workflow.contact_file.exists()
Ejemplo n.º 3
0
def test_detect_old_contact_file_old_run(workflow):
    """It should remove the contact file from an old run."""
    # modify the contact file to make it look like the COMMAND has changed
    workflow.dump_contact(**{CFF.COMMAND: 'foo bar baz'})
    # the workflow should not appear to be running (according to the contact
    # data) so detect_old_contact_file should not raise any errors
    detect_old_contact_file(workflow.reg)
    # as a side effect the contact file should have been removed
    assert not workflow.contact_file.exists()
Ejemplo n.º 4
0
def test_detect_old_contact_file_old_run(workflow, caplog, log_filter):
    """It should remove the contact file from an old run."""
    # modify the contact file to make it look like the COMMAND has changed
    workflow.dump_contact(**{CFF.COMMAND: 'foo bar baz'})
    caplog.set_level(logging.INFO, logger=CYLC_LOG)

    # the workflow should not appear to be running (according to the contact
    # data) so detect_old_contact_file should not raise any errors
    detect_old_contact_file(workflow.reg)

    # as a side effect the contact file should have been removed
    assert not workflow.contact_file.exists()
    assert log_filter(caplog, contains='Removed contact file')
Ejemplo n.º 5
0
def test_detect_old_contact_file_network_issue(workflow):
    """It should raise an error if there are network issues."""
    # modify the contact file to make it look like the PID has changed
    workflow.dump_contact(
        **{
            # set the HOST to a non existent host
            CFF.HOST:
            'not-a-host.no-such.domain'
        })
    # detect_old_contact_file should report that it can't tell if the workflow
    # is running or not
    with pytest.raises(CylcError) as exc_ctx:
        detect_old_contact_file(workflow.reg)
    assert ('Cannot determine whether workflow is running'
            in str(exc_ctx.value))
    # the contact file should be left alone
    assert workflow.contact_file.exists()
Ejemplo n.º 6
0
    def _timeout_handler(workflow: str, host: str, port: Union[int, str]):
        """Handle the eventuality of a communication timeout with the workflow.

        Args:
            workflow (str): workflow name
            host (str): host name
            port (Union[int, str]): port number
        Raises:
            ClientError: if the workflow has already stopped.
        """
        if workflow is None:
            return

        try:
            contact_data: Dict[str, str] = load_contact_file(workflow)
        except (IOError, ValueError, ServiceFileError):
            # Contact file does not exist or corrupted, workflow should be dead
            return

        contact_host: str = contact_data.get(ContactFileFields.HOST, '?')
        contact_port: str = contact_data.get(ContactFileFields.PORT, '?')
        if (
            contact_host != host
            or contact_port != str(port)
        ):
            raise CylcError(
                f'The workflow is no longer running at {host}:{port}\n'
                f'It has moved to {contact_host}:{contact_port}'
            )

        # Cannot connect, perhaps workflow is no longer running and is leaving
        # behind a contact file?
        try:
            detect_old_contact_file(workflow, contact_data)
        except (AssertionError, ServiceFileError):
            # old contact file exists and the workflow process still alive
            return
        else:
            # the workflow has stopped
            raise WorkflowStopped(workflow)
Ejemplo n.º 7
0
    def _timeout_handler(workflow: str, host: str, port: Union[int, str]):
        """Handle the eventuality of a communication timeout with the workflow.

        Args:
            workflow (str): workflow name
            host (str): host name
            port (Union[int, str]): port number
        Raises:
            ClientError: if the workflow has already stopped.
        """
        if workflow is None:
            return
        # Cannot connect, perhaps workflow is no longer running and is leaving
        # behind a contact file?
        try:
            detect_old_contact_file(workflow, (host, port))
        except (AssertionError, ServiceFileError):
            # * contact file not have matching (host, port) to workflow proc
            # * old contact file exists and the workflow process still alive
            return
        else:
            # the workflow has stopped
            raise WorkflowStopped(workflow)
Ejemplo n.º 8
0
def test_detect_old_contact_file_removal_errors(
    workflow,
    monkeypatch,
    caplog,
    log_filter,
    process_running,
    contact_present_after,
    raises_error,
):
    """Test issues with removing the contact file are handled correctly.

    Args:
        process_running:
            If True we will make it look like the workflow process is still
            running (i.e. the workflow is still running). In this case
            detect_old_contact_file should *not* attempt to remove the contact
            file.
        contact_present_after:
            If False we will make the contact file disappear midway through
            the operation. This can happen because:

            * detect_old_contact_file in another client.
            * cylc clean.
            * Aliens.

            This is fine, nothing should be logged.
        raises_error:
            If True we will make it look like removing the contact file
            resulted in an OS error (not a FileNotFoundError). This error
            should be logged.

    """

    # patch the is_process_running method
    def _is_process_running(*args):
        nonlocal workflow
        nonlocal process_running
        if not contact_present_after:
            # remove the contact file midway through detect_old_contact_file
            workflow.contact_file.unlink()
        return process_running

    monkeypatch.setattr(
        'cylc.flow.workflow_files._is_process_running',
        _is_process_running,
    )

    # patch the contact file removal
    def _unlink(*args):
        raise OSError('mocked-os-error')

    if raises_error:
        # force os.unlink to raise an arbitrary error
        monkeypatch.setattr(
            'cylc.flow.workflow_files.os.unlink',
            _unlink,
        )

    caplog.set_level(logging.INFO, logger=CYLC_LOG)

    # try to remove the contact file
    if process_running:
        # this should error if the process is running
        with pytest.raises(ServiceFileError):
            detect_old_contact_file(workflow.reg)
    else:
        detect_old_contact_file(workflow.reg)

    # decide which log messages we should expect to see
    if process_running:
        remove_succeeded = False
        remove_failed = False
    else:
        if contact_present_after:
            if raises_error:
                remove_succeeded = False
                remove_failed = True
            else:
                remove_succeeded = True
                remove_failed = False
        else:
            remove_succeeded = False
            remove_failed = False

    # check the appropriate messages were logged
    assert bool(log_filter(
        caplog,
        contains='Removed contact file',
    )) is remove_succeeded
    assert bool(
        log_filter(
            caplog,
            contains=(f'Failed to remove contact file for {workflow.reg}:'
                      '\nmocked-os-error'),
        )) is remove_failed
Ejemplo n.º 9
0
def scheduler_cli(options: 'Values', workflow_id: str) -> None:
    """Run the workflow.

    This function should contain all of the command line facing
    functionality of the Scheduler, exit codes, logging, etc.

    The Scheduler itself should be a Python object you can import and
    run in a regular Python session so cannot contain this kind of
    functionality.

    """
    # Parse workflow name but delay Cylc 7 suiter.rc deprecation warning
    # until after the start-up splash is printed.
    # TODO: singleton
    (workflow_id, ), _ = parse_ids(
        workflow_id,
        constraint='workflows',
        max_workflows=1,
        # warn_depr=False,  # TODO
    )
    try:
        detect_old_contact_file(workflow_id)
    except ServiceFileError as exc:
        print(f"Resuming already-running workflow\n\n{exc}")
        pclient = WorkflowRuntimeClient(
            workflow_id,
            timeout=options.comms_timeout,
        )
        mutation_kwargs = {
            'request_string': RESUME_MUTATION,
            'variables': {
                'wFlows': [workflow_id]
            }
        }
        pclient('graphql', mutation_kwargs)
        sys.exit(0)

    # re-execute on another host if required
    _distribute(options.host)

    # print the start message
    if (cylc.flow.flags.verbosity > -1
            and (options.no_detach or options.format == 'plain')):
        print(cparse(cylc_header()))

    if cylc.flow.flags.cylc7_back_compat:
        LOG.warning(SUITERC_DEPR_MSG)

    # setup the scheduler
    # NOTE: asyncio.run opens an event loop, runs your coro,
    #       then shutdown async generators and closes the event loop
    scheduler = Scheduler(workflow_id, options)
    asyncio.run(_setup(scheduler))

    # daemonize if requested
    # NOTE: asyncio event loops cannot persist across daemonization
    #       ensure you have tidied up all threads etc before daemonizing
    if not options.no_detach:
        from cylc.flow.daemonize import daemonize
        daemonize(scheduler)

    # setup loggers
    _open_logs(workflow_id, options.no_detach)

    # run the workflow
    ret = asyncio.run(_run(scheduler))

    # exit
    # NOTE: we must clean up all asyncio / threading stuff before exiting
    # NOTE: any threads which include sleep statements could cause
    #       sys.exit to hang if not shutdown properly
    LOG.info("DONE")
    close_log(LOG)
    sys.exit(ret)
Ejemplo n.º 10
0
def scheduler_cli(parser, options, reg):
    """Run the workflow.

    This function should contain all of the command line facing
    functionality of the Scheduler, exit codes, logging, etc.

    The Scheduler itself should be a Python object you can import and
    run in a regular Python session so cannot contain this kind of
    functionality.

    """
    workflow_files.validate_flow_name(reg)
    reg = os.path.normpath(reg)
    try:
        workflow_files.detect_old_contact_file(reg)
    except ServiceFileError as exc:
        print(f"Resuming already-running workflow\n\n{exc}")
        pclient = WorkflowRuntimeClient(reg, timeout=options.comms_timeout)
        mutation_kwargs = {
            'request_string': RESUME_MUTATION,
            'variables': {
                'wFlows': [reg]
            }
        }
        pclient('graphql', mutation_kwargs)
        sys.exit(0)

    # re-execute on another host if required
    _distribute(options.host)

    # print the start message
    if (cylc.flow.flags.verbosity > -1
            and (options.no_detach or options.format == 'plain')):
        print(cparse(cylc_header()))

    # setup the scheduler
    # NOTE: asyncio.run opens an event loop, runs your coro,
    #       then shutdown async generators and closes the event loop
    scheduler = Scheduler(reg, options)
    asyncio.run(_setup(scheduler))

    # daemonize if requested
    # NOTE: asyncio event loops cannot persist across daemonization
    #       ensure you have tidied up all threads etc before daemonizing
    if not options.no_detach:
        from cylc.flow.daemonize import daemonize
        daemonize(scheduler)

    # setup loggers
    _open_logs(reg, options.no_detach)

    # run the workflow
    ret = asyncio.run(_run(scheduler))

    # exit
    # NOTE: we must clean up all asyncio / threading stuff before exiting
    # NOTE: any threads which include sleep statements could cause
    #       sys.exit to hang if not shutdown properly
    LOG.info("DONE")
    _close_logs()
    sys.exit(ret)