Ejemplo n.º 1
0
def main(parser, options, suite, event_msg, event_id):
    suite = os.path.normpath(suite)
    LOG.info('Send to suite %s: "%s" (%s)', suite, event_msg, event_id)
    pclient = get_client(suite, timeout=options.comms_timeout)

    max_n_tries = int(options.max_n_tries)
    retry_intvl_secs = float(options.retry_intvl_secs)

    mutation_kwargs = {
        'request_string': MUTATION,
        'variables': {
            'wFlows': [suite],
            'eventMsg': event_msg,
            'eventId': event_id,
        }
    }

    for i_try in range(max_n_tries):
        try:
            pclient('graphql', mutation_kwargs)
        except ClientError as exc:
            LOG.exception(exc)
            LOG.info(MSG_SEND_FAILED, i_try + 1, max_n_tries)
            if i_try == max_n_tries - 1:  # final attempt
                raise CylcError('send failed')
            LOG.info(MSG_SEND_RETRY, retry_intvl_secs, options.comms_timeout)
            sleep(retry_intvl_secs)
        else:
            if i_try > 0:
                LOG.info(MSG_SEND_SUCCEED, i_try + 1, max_n_tries)
            break
Ejemplo n.º 2
0
async def scan_one(reg, host, port, timeout=None, methods=None):
    if not methods:
        methods = ['identify']

    if is_remote_host(host):
        try:
            host = get_host_ip_by_name(host)  # IP reduces DNS traffic
        except socket.error as exc:
            if cylc.flow.flags.debug:
                raise
            sys.stderr.write("ERROR: %s: %s\n" % (exc, host))
            return (reg, host, port, None)

    # NOTE: Connect to the suite by host:port, this was the
    #       SuiteRuntimeClient will not attempt to check the contact file
    #       which would be unnecessary as we have already done so.
    # NOTE: This part of the scan *is* IO blocking.
    client = SuiteRuntimeClient(reg, host=host, port=port, timeout=timeout)

    result = {}
    for method in methods:
        # work our way up the chain of identity methods, extract as much
        # information as we can before the suite rejects us
        try:
            msg = await client.async_request(method)
        except ClientTimeout as exc:
            LOG.exception(f"Timeout: name:{reg}, host:{host}, port:{port}")
            return (reg, host, port, MSG_TIMEOUT)
        except ClientError as exc:
            LOG.exception("ClientError")
            return (reg, host, port, result or None)
        else:
            result.update(msg)
    return (reg, host, port, result)
Ejemplo n.º 3
0
Archivo: server.py Proyecto: cylc/cylc
    def _receiver(self, message):
        """Wrap incoming messages and dispatch them to exposed methods.

        Args:
            message (dict): message contents
        """
        # determine the server method to call
        try:
            method = getattr(self, message['command'])
            args = message['args']
            args.update({'user': message['user']})
            if 'meta' in message:
                args['meta'] = message['meta']
        except KeyError:
            # malformed message
            return {'error': {
                'message': 'Request missing required field(s).'}}
        except AttributeError:
            # no exposed method by that name
            return {'error': {
                'message': 'No method by the name "%s"' % message['command']}}

        # generate response
        try:
            response = method(**args)
        except Exception as exc:
            # includes incorrect arguments (TypeError)
            LOG.exception(exc)  # note the error server side
            import traceback
            return {'error': {
                'message': str(exc), 'traceback': traceback.format_exc()}}

        return {'data': response}
Ejemplo n.º 4
0
async def _wrapper(fcn, scheduler, state, timings=None):
    """Wrapper for all plugin functions.

    * Logs the function's execution.
    * Times the function.
    * Catches any exceptions which aren't subclasses of CylcError.

    """
    sig = f'{fcn.__module__}:{fcn.__name__}'
    LOG.debug(f'main_loop [run] {sig}')
    start_time = time()
    try:
        await fcn(scheduler, state)
    except CylcError as exc:
        # allow CylcErrors through (e.g. SchedulerStop)
        # NOTE: the `from None` bit gets rid of this gunk:
        # > During handling of the above exception another exception
        raise MainLoopPluginException(exc) from None
    except Exception as exc:
        LOG.error(f'Error in main loop plugin {sig}')
        LOG.exception(exc)
    duration = time() - start_time
    LOG.debug(f'main_loop [end] {sig} ({duration:.3f}s)')
    if timings is not None:
        timings.append((start_time, duration))
Ejemplo n.º 5
0
 def _run_command_init(cls,
                       ctx,
                       bad_hosts=None,
                       callback=None,
                       callback_args=None,
                       callback_255=None,
                       callback_255_args=None):
     """Prepare and launch shell command in ctx."""
     try:
         if ctx.cmd_kwargs.get('stdin_files'):
             if len(ctx.cmd_kwargs['stdin_files']) > 1:
                 stdin_file = cls.get_temporary_file()
                 for file_ in ctx.cmd_kwargs['stdin_files']:
                     if hasattr(file_, 'read'):
                         stdin_file.write(file_.read())
                     else:
                         with open(file_, 'rb') as openfile:
                             stdin_file.write(openfile.read())
                 stdin_file.seek(0)
             elif hasattr(ctx.cmd_kwargs['stdin_files'][0], 'read'):
                 stdin_file = ctx.cmd_kwargs['stdin_files'][0]
             else:
                 stdin_file = open(  # noqa: SIM115
                     # (nasty use of file handles, should avoid in future)
                     ctx.cmd_kwargs['stdin_files'][0],
                     'rb')
         elif ctx.cmd_kwargs.get('stdin_str'):
             stdin_file = cls.get_temporary_file()
             stdin_file.write(ctx.cmd_kwargs.get('stdin_str').encode())
             stdin_file.seek(0)
         else:
             stdin_file = DEVNULL
         proc = procopen(
             ctx.cmd,
             stdin=stdin_file,
             stdoutpipe=True,
             stderrpipe=True,
             # Execute command as a process group leader,
             # so we can use "os.killpg" to kill the whole group.
             preexec_fn=os.setpgrp,
             env=ctx.cmd_kwargs.get('env'),
             usesh=ctx.cmd_kwargs.get('shell'))
         # calls to open a shell are aggregated in cylc_subproc.procopen()
         # with logging for what is calling it and the commands given
     except OSError as exc:
         if exc.filename is None:
             exc.filename = ctx.cmd[0]
         LOG.exception(exc)
         ctx.ret_code = 1
         ctx.err = str(exc)
         cls._run_command_exit(ctx,
                               bad_hosts=bad_hosts,
                               callback=callback,
                               callback_args=callback_args,
                               callback_255=callback_255,
                               callback_255_args=callback_255_args)
         return None
     else:
         LOG.debug(ctx.cmd)
         return proc
Ejemplo n.º 6
0
def main(parser, options, suite, event_msg, event_id):
    LOG.info('Send to suite %s: "%s" (%s)', suite, event_msg, event_id)

    pclient = SuiteRuntimeClient(suite, options.owner, options.host,
                                 options.port)

    max_n_tries = int(options.max_n_tries)
    retry_intvl_secs = float(options.retry_intvl_secs)

    for i_try in range(max_n_tries):
        try:
            pclient('put_ext_trigger', {
                'message': event_msg,
                'id': event_id
            },
                    timeout=options.comms_timeout)
        except ClientError as exc:
            LOG.exception(exc)
            LOG.info(MSG_SEND_FAILED, i_try + 1, max_n_tries)
            if i_try == max_n_tries - 1:  # final attempt
                raise CylcError('send failed')
            else:
                LOG.info(MSG_SEND_RETRY, retry_intvl_secs,
                         options.comms_timeout)
                sleep(retry_intvl_secs)
        else:
            if i_try > 0:
                LOG.info(MSG_SEND_SUCCEED, i_try + 1, max_n_tries)
            break
Ejemplo n.º 7
0
    def _receiver(self, message):
        """Wrap incoming messages and dispatch them to exposed methods.

        Args:
            message (dict): message contents
        """
        # determine the server method to call
        try:
            method = getattr(self, message['command'])
            args = message['args']
            args.update({'user': message['user']})
            if 'meta' in message:
                args['meta'] = message['meta']
        except KeyError:
            # malformed message
            return {'error': {
                'message': 'Request missing required field(s).'}}
        except AttributeError:
            # no exposed method by that name
            return {'error': {
                'message': 'No method by the name "%s"' % message['command']}}

        # generate response
        try:
            response = method(**args)
        except Exception as exc:
            # includes incorrect arguments (TypeError)
            LOG.exception(exc)  # note the error server side
            import traceback
            return {'error': {
                'message': str(exc), 'traceback': traceback.format_exc()}}

        return {'data': response}
Ejemplo n.º 8
0
def ensure_keypair_exists(auth_parent_dir, auth_child_dir, tag):
    """Check if a set of public/private keys exist and if not, create them.

    Args:
        auth_parent_dir (str):
            Parent containing public/private key directories (auth_child_dir).
        auth_child_dir (str):
            Child containing public/private keys.
        tag (str): Filename/Basename of key.

    Returns:
        bool: True if the keypair (now) exists.

    """
    public_key_location, private_key_location = return_key_locations(
        auth_child_dir)  # where the keys should be, else where to create them
    if (os.path.exists(public_key_location)
            and os.path.exists(private_key_location)):
        return True

    # Ensure parent dir exists (child dir will be created if it does not)
    if not os.path.exists(auth_parent_dir):
        os.mkdir(auth_parent_dir)
    try:
        generate_key_store(auth_parent_dir, tag)
        return True
    except Exception:
        # Catch anything so we can otherwise be sure the key store exists.
        LOG.exception("Failed to create %s authentication keys." % tag)
        return False
Ejemplo n.º 9
0
def main(parser: COP, options: 'Values', workflow: str, event_msg: str,
         event_id: str) -> None:
    workflow, _ = parse_reg(workflow)
    LOG.info('Send to workflow %s: "%s" (%s)', workflow, event_msg, event_id)
    pclient = get_client(workflow, timeout=options.comms_timeout)

    max_n_tries = int(options.max_n_tries)
    retry_intvl_secs = float(options.retry_intvl_secs)

    mutation_kwargs = {
        'request_string': MUTATION,
        'variables': {
            'wFlows': [workflow],
            'eventMsg': event_msg,
            'eventId': event_id,
        }
    }

    for i_try in range(max_n_tries):
        try:
            pclient('graphql', mutation_kwargs)
        except ClientError as exc:
            LOG.exception(exc)
            LOG.info(MSG_SEND_FAILED, i_try + 1, max_n_tries)
            if i_try == max_n_tries - 1:  # final attempt
                raise CylcError('send failed')
            LOG.info(MSG_SEND_RETRY, retry_intvl_secs, options.comms_timeout)
            sleep(retry_intvl_secs)
        else:
            if i_try > 0:
                LOG.info(MSG_SEND_SUCCEED, i_try + 1, max_n_tries)
            break
Ejemplo n.º 10
0
 def create_directory(dir_, name):
     """Create directory. Raise GlobalConfigError on error."""
     try:
         os.makedirs(dir_, exist_ok=True)
     except OSError as exc:
         LOG.exception(exc)
         raise GlobalConfigError(
             'Failed to create directory "' + name + '"')
Ejemplo n.º 11
0
 def insert_db_job(self, row_idx, row):
     """Load job element from DB post restart."""
     if row_idx == 0:
         LOG.info("LOADING job data")
     (point_string, name, status, submit_num, time_submit, time_run,
      time_run_exit, batch_sys_name, batch_sys_job_id, platform_name) = row
     if status not in JOB_STATUS_SET:
         return
     t_id = f'{self.workflow_id}{ID_DELIM}{point_string}{ID_DELIM}{name}'
     j_id = f'{t_id}{ID_DELIM}{submit_num}'
     try:
         tdef = self.schd.config.get_taskdef(name)
         j_owner = self.schd.owner
         if platform_name:
             j_host = get_host_from_platform(get_platform(platform_name))
         else:
             j_host = self.schd.host
         j_buf = PbJob(
             stamp=f'{j_id}@{time()}',
             id=j_id,
             submit_num=submit_num,
             state=status,
             task_proxy=t_id,
             submitted_time=time_submit,
             started_time=time_run,
             finished_time=time_run_exit,
             batch_sys_name=batch_sys_name,
             batch_sys_job_id=batch_sys_job_id,
             host=j_host,
             owner=j_owner,
             name=name,
             cycle_point=point_string,
         )
         # Add in log files.
         j_buf.job_log_dir = get_task_job_log(self.schd.suite, point_string,
                                              name, submit_num)
         overrides = self.schd.task_events_mgr.broadcast_mgr.get_broadcast(
             TaskID.get(name, point_string))
         if overrides:
             rtconfig = pdeepcopy(tdef.rtconfig)
             poverride(rtconfig, overrides, prepend=True)
         else:
             rtconfig = tdef.rtconfig
         j_buf.extra_logs.extend([
             os.path.expanduser(os.path.expandvars(log_file))
             for log_file in rtconfig['extra log files']
         ])
     except SuiteConfigError:
         LOG.exception(
             ('ignoring job %s from the suite run database\n'
              '(its task definition has probably been deleted).') % j_id)
     except Exception:
         LOG.exception('could not load job %s' % j_id)
     else:
         self.added[j_id] = j_buf
         self.task_jobs.setdefault(t_id, set()).add(j_id)
         self.updates_pending = True
Ejemplo n.º 12
0
 def _manip_task_jobs_callback(self,
                               ctx,
                               suite,
                               itasks,
                               summary_callback,
                               more_callbacks=None):
     """Callback when submit/poll/kill tasks command exits."""
     if ctx.ret_code:
         LOG.error(ctx)
     else:
         LOG.debug(ctx)
     # A dict for easy reference of (CYCLE, NAME, SUBMIT_NUM) -> TaskProxy
     #
     # Note for "reload": A TaskProxy instance may be replaced on reload, so
     # the "itasks" list may not reference the TaskProxy objects that
     # replace the old ones. The .reload_successor attribute provides the
     # link(s) for us to get to the latest replacement.
     #
     # Note for "kill": It is possible for a job to trigger its trap and
     # report back to the suite back this logic is called. If so, the task
     # will no longer be TASK_STATUS_SUBMITTED or TASK_STATUS_RUNNING, and
     # its output line will be ignored here.
     tasks = {}
     for itask in itasks:
         while itask.reload_successor is not None:
             itask = itask.reload_successor
         if itask.point is not None and itask.submit_num:
             submit_num = "%02d" % (itask.submit_num)
             tasks[(str(itask.point), itask.tdef.name, submit_num)] = itask
     handlers = [(self.batch_sys_mgr.OUT_PREFIX_SUMMARY, summary_callback)]
     if more_callbacks:
         for prefix, callback in more_callbacks.items():
             handlers.append((prefix, callback))
     out = ctx.out
     if not out:
         out = ""
     bad_tasks = dict(tasks)
     for line in out.splitlines(True):
         for prefix, callback in handlers:
             if line.startswith(prefix):
                 line = line[len(prefix):].strip()
                 try:
                     path = line.split("|", 2)[1]  # timestamp, path, status
                     point, name, submit_num = path.split(os.sep, 2)
                     if prefix == self.batch_sys_mgr.OUT_PREFIX_SUMMARY:
                         del bad_tasks[(point, name, submit_num)]
                     itask = tasks[(point, name, submit_num)]
                     callback(suite, itask, ctx, line)
                 except (LookupError, ValueError, KeyError) as exc:
                     LOG.warning('Unhandled %s output: %s', ctx.cmd_key,
                                 line)
                     LOG.exception(exc)
     # Task jobs that are in the original command but did not get a status
     # in the output. Handle as failures.
     for key, itask in sorted(bad_tasks.items()):
         line = ("|".join([ctx.timestamp, os.sep.join(key), "1"]) + "\n")
         summary_callback(suite, itask, ctx, line)
Ejemplo n.º 13
0
 def _manip_task_jobs_callback(
         self, ctx, suite, itasks, summary_callback, more_callbacks=None):
     """Callback when submit/poll/kill tasks command exits."""
     if ctx.ret_code:
         LOG.error(ctx)
     else:
         LOG.debug(ctx)
     # A dict for easy reference of (CYCLE, NAME, SUBMIT_NUM) -> TaskProxy
     #
     # Note for "reload": A TaskProxy instance may be replaced on reload, so
     # the "itasks" list may not reference the TaskProxy objects that
     # replace the old ones. The .reload_successor attribute provides the
     # link(s) for us to get to the latest replacement.
     #
     # Note for "kill": It is possible for a job to trigger its trap and
     # report back to the suite back this logic is called. If so, the task
     # will no longer be TASK_STATUS_SUBMITTED or TASK_STATUS_RUNNING, and
     # its output line will be ignored here.
     tasks = {}
     for itask in itasks:
         while itask.reload_successor is not None:
             itask = itask.reload_successor
         if itask.point is not None and itask.submit_num:
             submit_num = "%02d" % (itask.submit_num)
             tasks[(str(itask.point), itask.tdef.name, submit_num)] = itask
     handlers = [(self.batch_sys_mgr.OUT_PREFIX_SUMMARY, summary_callback)]
     if more_callbacks:
         for prefix, callback in more_callbacks.items():
             handlers.append((prefix, callback))
     out = ctx.out
     if not out:
         out = ""
     bad_tasks = dict(tasks)
     for line in out.splitlines(True):
         for prefix, callback in handlers:
             if line.startswith(prefix):
                 line = line[len(prefix):].strip()
                 try:
                     path = line.split("|", 2)[1]  # timestamp, path, status
                     point, name, submit_num = path.split(os.sep, 2)
                     if prefix == self.batch_sys_mgr.OUT_PREFIX_SUMMARY:
                         del bad_tasks[(point, name, submit_num)]
                     itask = tasks[(point, name, submit_num)]
                     callback(suite, itask, ctx, line)
                 except (LookupError, ValueError, KeyError) as exc:
                     LOG.warning(
                         'Unhandled %s output: %s', ctx.cmd_key, line)
                     LOG.exception(exc)
     # Task jobs that are in the original command but did not get a status
     # in the output. Handle as failures.
     for key, itask in sorted(bad_tasks.items()):
         line = (
             "|".join([ctx.timestamp, os.sep.join(key), "1"]) + "\n")
         summary_callback(suite, itask, ctx, line)
Ejemplo n.º 14
0
async def scan_one(reg, host, port, pub_port, api, timeout=None, methods=None):
    """Connect to and identify workflow server if possible.

    Args:
        reg (str): Registered name of workflow.
        host (str): Workflow host.
        port (int): Workflow server port.
        pub_port (int): Workflow publisher port.
        api (str): Workflow API version.
        timeout (float, optional): Client socket receiver timeout.
        methods (list): List of methods/endpoints to request.

    Returns:
        tuple: (reg, host, port, pub_port, result)

    """
    if not methods:
        methods = ['identify']

    if is_remote_host(host):
        try:
            host = get_host_ip_by_name(host)  # IP reduces DNS traffic
        except socket.error as exc:
            if cylc.flow.flags.debug:
                raise
            sys.stderr.write("ERROR: %s: %s\n" % (exc, host))
            return (reg, host, port, pub_port, api, None)

    # NOTE: Connect to the suite by host:port, this was the
    #       SuiteRuntimeClient will not attempt to check the contact file
    #       which would be unnecessary as we have already done so.
    # NOTE: This part of the scan *is* IO blocking.
    client = SuiteRuntimeClient(reg, host=host, port=port, timeout=timeout)

    result = {}
    for method in methods:
        # work our way up the chain of identity methods, extract as much
        # information as we can before the suite rejects us
        try:
            msg = await client.async_request(method)
        except ClientTimeout as exc:
            LOG.exception(
                "Timeout: name:%s, host:%s, port:%s", reg, host, port)
            return (reg, host, port, pub_port, api, MSG_TIMEOUT)
        except ClientError as exc:
            LOG.exception("ClientError")
            return (reg, host, port, pub_port, api, result or None)
        else:
            result.update(msg)
    return (reg, host, port, pub_port, api, result)
Ejemplo n.º 15
0
def run_reftest(config, ctx):
    """Run reference test at shutdown."""
    reffilename = config.get_ref_log_name()
    curfilename = get_workflow_test_log_name(ctx.workflow)
    ref = _load_reflog(reffilename)
    cur = _load_reflog(curfilename)
    if ref == cur:
        LOG.info('WORKFLOW REFERENCE TEST PASSED')
    else:
        exc = WorkflowEventError(
            'WORKFLOW REFERENCE TEST FAILED\n'
            'triggering is NOT consistent with the reference log:\n%s\n' %
            '\n'.join(unified_diff(ref, cur, 'reference', 'this run')))
        LOG.exception(exc)
        raise exc
Ejemplo n.º 16
0
 def _event_email_callback(self, proc_ctx, schd_ctx):
     """Call back when email notification command exits."""
     for id_key in proc_ctx.cmd_kwargs["id_keys"]:
         key1, point, name, submit_num = id_key
         try:
             if proc_ctx.ret_code == 0:
                 del self.event_timers[id_key]
                 log_ctx = SubProcContext((key1, submit_num), None)
                 log_ctx.ret_code = 0
                 log_task_job_activity(log_ctx, schd_ctx.suite, point, name,
                                       submit_num)
             else:
                 self.event_timers[id_key].unset_waiting()
         except KeyError as exc:
             LOG.exception(exc)
Ejemplo n.º 17
0
    def _listener(self):
        """The server main loop, listen for and serve requests."""
        while True:
            # process any commands passed to the listener by its parent process
            if self.queue.qsize():
                command = self.queue.get()
                if command == 'STOP':
                    break
                raise ValueError('Unknown command "%s"' % command)

            try:
                # wait RECV_TIMEOUT for a message
                msg = self.socket.recv_string()
            except zmq.error.Again:
                # timeout, continue with the loop, this allows the listener
                # thread to stop
                continue
            except zmq.error.ZMQError as exc:
                LOG.exception('unexpected error: %s', exc)
                continue

            # attempt to decode the message, authenticating the user in the
            # process
            try:
                message = decode_(msg)
            except Exception as exc:  # purposefully catch generic exception
                # failed to decode message, possibly resulting from failed
                # authentication
                LOG.exception('failed to decode message: "%s"', exc)
            else:
                # success case - serve the request
                res = self._receiver(message)
                if message['command'] in PB_METHOD_MAP:
                    response = res['data']
                else:
                    response = encode_(res).encode()
                # send back the string to bytes response
                self.socket.send(response)

            # Note: we are using CurveZMQ to secure the messages (see
            # self.curve_auth, self.socket.curve_...key etc.). We have set up
            # public-key cryptography on the ZMQ messaging and sockets, so
            # there is no need to encrypt messages ourselves before sending.

            sleep(0)  # yield control to other threads
Ejemplo n.º 18
0
 def _run_command_init(cls, ctx, callback=None, callback_args=None):
     """Prepare and launch shell command in ctx."""
     try:
         if ctx.cmd_kwargs.get('stdin_files'):
             if len(ctx.cmd_kwargs['stdin_files']) > 1:
                 stdin_file = cls.get_temporary_file()
                 for file_ in ctx.cmd_kwargs['stdin_files']:
                     if hasattr(file_, 'read'):
                         stdin_file.write(file_.read())
                     else:
                         stdin_file.write(open(file_, 'rb').read())
                 stdin_file.seek(0)
             elif hasattr(ctx.cmd_kwargs['stdin_files'][0], 'read'):
                 stdin_file = ctx.cmd_kwargs['stdin_files'][0]
             else:
                 stdin_file = open(
                     ctx.cmd_kwargs['stdin_files'][0], 'rb')
         elif ctx.cmd_kwargs.get('stdin_str'):
             stdin_file = cls.get_temporary_file()
             stdin_file.write(ctx.cmd_kwargs.get('stdin_str').encode())
             stdin_file.seek(0)
         else:
             stdin_file = open(os.devnull)
         proc = procopen(
             ctx.cmd, stdin=stdin_file, stdoutpipe=True, stderrpipe=True,
             # Execute command as a process group leader,
             # so we can use "os.killpg" to kill the whole group.
             preexec_fn=os.setpgrp,
             env=ctx.cmd_kwargs.get('env'),
             usesh=ctx.cmd_kwargs.get('shell'))
         # calls to open a shell are aggregated in cylc_subproc.procopen()
         # with logging for what is calling it and the commands given
     except (IOError, OSError) as exc:
         if exc.filename is None:
             exc.filename = ctx.cmd[0]
         LOG.exception(exc)
         ctx.ret_code = 1
         ctx.err = str(exc)
         cls._run_command_exit(ctx, callback, callback_args)
         return None
     else:
         LOG.debug(ctx.cmd)
         return proc
Ejemplo n.º 19
0
def run_reftest(schd: 'Scheduler') -> None:
    """Run reference test at shutdown."""
    reffilename = schd.config.get_ref_log_name()
    curfilename = get_workflow_test_log_name(schd.workflow)
    ref = _load_reflog(reffilename)
    cur = _load_reflog(curfilename)
    if not ref:
        raise WorkflowEventError("No triggering events in reference log.")
    if not cur:
        raise WorkflowEventError("No triggering events in test log.")
    if ref == cur:
        LOG.info('WORKFLOW REFERENCE TEST PASSED')
    else:
        exc = WorkflowEventError(
            'WORKFLOW REFERENCE TEST FAILED\n'
            'triggering is NOT consistent with the reference log:\n%s\n'
            % '\n'.join(unified_diff(ref, cur, 'reference', 'this run'))
        )
        LOG.exception(exc)
        raise exc
Ejemplo n.º 20
0
def log_task_job_activity(ctx, suite, point, name, submit_num=None):
    """Log an activity for a task job."""
    ctx_str = str(ctx)
    if not ctx_str:
        return
    if isinstance(ctx.cmd_key, tuple):  # An event handler
        submit_num = ctx.cmd_key[-1]
    job_activity_log = get_task_job_activity_log(suite, point, name,
                                                 submit_num)
    try:
        with open(job_activity_log, "ab") as handle:
            handle.write((ctx_str + '\n').encode())
    except IOError as exc:
        # This happens when there is no job directory, e.g. if job host
        # selection command causes an submission failure, there will be no job
        # directory. In this case, just send the information to the suite log.
        LOG.exception(exc)
        LOG.info(ctx_str)
    if ctx.cmd and ctx.ret_code:
        LOG.error(ctx_str)
    elif ctx.cmd:
        LOG.debug(ctx_str)
Ejemplo n.º 21
0
    def _listener(self):
        """The server main loop, listen for and serve requests."""
        while True:
            # process any commands passed to the listener by its parent process
            if self.queue.qsize():
                command = self.queue.get()
                if command == 'STOP':
                    break
                else:
                    raise ValueError('Unknown command "%s"' % command)

            try:
                # wait RECV_TIMEOUT for a message
                msg = self.socket.recv_string()
            except zmq.error.Again:
                # timeout, continue with the loop, this allows the listener
                # thread to stop
                continue

            # attempt to decode the message, authenticating the user in the
            # process
            try:
                message = self.decode(msg, self.secret())
            except Exception as exc:  # purposefully catch generic exception
                # failed to decode message, possibly resulting from failed
                # authentication
                LOG.exception(f'failed to decode message: {str(exc)}')
            else:
                # success case - serve the request
                res = self._receiver(message)
                if message['command'] in PB_METHOD_MAP:
                    response = res['data']
                else:
                    response = self.encode(res, self.secret()).encode()
                # send back the string to bytes response
                self.socket.send(response)

            sleep(0)  # yield control to other threads
Ejemplo n.º 22
0
 def _job_logs_retrieval_callback(self, proc_ctx, schd_ctx):
     """Call back when log job retrieval completes."""
     if proc_ctx.ret_code:
         LOG.error(proc_ctx)
     else:
         LOG.debug(proc_ctx)
     for id_key in proc_ctx.cmd_kwargs["id_keys"]:
         key1, point, name, submit_num = id_key
         try:
             # All completed jobs are expected to have a "job.out".
             fnames = [JOB_LOG_OUT]
             try:
                 if key1[1] not in 'succeeded':
                     fnames.append(JOB_LOG_ERR)
             except TypeError:
                 pass
             fname_oks = {}
             for fname in fnames:
                 fname_oks[fname] = os.path.exists(
                     get_task_job_log(schd_ctx.suite, point, name,
                                      submit_num, fname))
             # All expected paths must exist to record a good attempt
             log_ctx = SubProcContext((key1, submit_num), None)
             if all(fname_oks.values()):
                 log_ctx.ret_code = 0
                 del self.event_timers[id_key]
             else:
                 log_ctx.ret_code = 1
                 log_ctx.err = "File(s) not retrieved:"
                 for fname, exist_ok in sorted(fname_oks.items()):
                     if not exist_ok:
                         log_ctx.err += " %s" % fname
                 self.event_timers[id_key].unset_waiting()
             log_task_job_activity(log_ctx, schd_ctx.suite, point, name,
                                   submit_num)
         except KeyError as exc:
             LOG.exception(exc)
Ejemplo n.º 23
0
async def _run(parser, options, reg, is_restart, scheduler):
    """Run the workflow and handle exceptions."""
    # run cylc run
    ret = 0
    try:
        await scheduler.run()

    # stop cylc stop
    except SchedulerError:
        ret = 1
    except KeyboardInterrupt as exc:
        try:
            await scheduler.shutdown(exc)
        except Exception as exc2:
            # In case of exceptions in the shutdown method itself.
            LOG.exception(exc2)
            raise exc2 from None
        ret = 2
    except Exception:
        ret = 3

    # kthxbye
    finally:
        return ret
Ejemplo n.º 24
0
async def graphql_query(flow, fields, filters=None):
    """Obtain information from a GraphQL request to the flow.

    Requires:
        * is_active(True)
        * contact_info

    Args:
        flow (dict):
            Flow information dictionary, provided by scan through the pipe.
        fields (iterable):
            Iterable containing the fields to request e.g::

               ['id', 'name']

            One level of nesting is supported e.g::

               {'name': None, 'meta': ['title']}
        filters (list):
            Filter by the data returned from the query.
            List in the form ``[(key, ...), value]``, e.g::

               # state must be running
               [('state',), 'running']

               # state must be running or paused
               [('state',), ('running', 'paused')]

    """
    query = f'query {{ workflows(ids: ["{flow["name"]}"]) {{ {fields} }} }}'
    try:
        client = WorkflowRuntimeClient(
            flow['name'],
            # use contact_info data if present for efficiency
            host=flow.get('CYLC_WORKFLOW_HOST'),
            port=flow.get('CYLC_WORKFLOW_PORT'))
    except WorkflowStopped:
        LOG.warning(f'Workflow not running: {flow["name"]}')
        return False
    try:
        ret = await client.async_request('graphql', {
            'request_string': query,
            'variables': {}
        })
    except ClientTimeout:
        LOG.exception(f'Timeout: name: {flow["name"]}, '
                      f'host: {client.host}, '
                      f'port: {client.port}')
        return False
    except ClientError as exc:
        LOG.exception(exc)
        return False
    else:
        # stick the result into the flow object
        for item in ret:
            if 'error' in item:
                LOG.exception(item['error']['message'])
                return False
            for workflow in ret.get('workflows', []):
                flow.update(workflow)

        # process filters
        for field, value in filters or []:
            for field_ in field:
                value_ = flow[field_]
            if isinstance(value, Iterable):
                if value_ not in value:
                    return False
            else:
                if value_ != value:
                    return False

        return flow