def test_on_retry(self): tw = TaskRequest(mytask.name, uuid(), [1], {"f": "x"}) tw.eventer = MockEventDispatcher() try: raise RetryTaskError("foo", KeyError("moofoobar")) except: einfo = ExceptionInfo() tw.on_failure(einfo) self.assertIn("task-retried", tw.eventer.sent) tw._does_info = False tw.on_failure(einfo) einfo.internal = True tw.on_failure(einfo)
def test_on_retry(self): tw = TaskRequest(mytask.name, uuid(), [1], {'f': 'x'}) tw.eventer = MockEventDispatcher() try: raise RetryTaskError('foo', KeyError('moofoobar')) except: einfo = ExceptionInfo() tw.on_failure(einfo) self.assertIn('task-retried', tw.eventer.sent) prev, module._does_info = module._does_info, False try: tw.on_failure(einfo) finally: module._does_info = prev einfo.internal = True tw.on_failure(einfo)
def handle_failure(self, task, store_errors=True): """Handle exception.""" req = task.request type_, _, tb = sys.exc_info() try: exc = self.retval einfo = ExceptionInfo() einfo.exception = get_pickleable_exception(einfo.exception) einfo.type = get_pickleable_etype(einfo.type) if store_errors: task.backend.mark_as_failure(req.id, exc, einfo.traceback) task.on_failure(exc, req.id, req.args, req.kwargs, einfo) signals.task_failure.send( sender=task, task_id=req.id, exception=exc, args=req.args, kwargs=req.kwargs, traceback=tb, einfo=einfo ) return einfo finally: del (tb)
def _join_exited_workers(self, shutdown=False): """Cleanup after any worker processes which have exited due to reaching their specified lifetime. Returns True if any workers were cleaned up. """ now = None # The worker may have published a result before being terminated, # but we have no way to accurately tell if it did. So we wait for # _lost_worker_timeout seconds before we mark the job with # WorkerLostError. for job in [ job for job in self._cache.values() if not job.ready() and job._worker_lost ]: now = now or time.time() if now - job._worker_lost > job._lost_worker_timeout: exc_info = None try: raise WorkerLostError("Worker exited prematurely.") except WorkerLostError: exc_info = ExceptionInfo(sys.exc_info()) job._set(None, (False, exc_info)) if shutdown and not len(self._pool): raise WorkersJoined() cleaned = [] for i in reversed(range(len(self._pool))): worker = self._pool[i] if worker.exitcode is not None: # worker exited debug('Supervisor: cleaning up worker %d' % i) worker.join() debug('Supervisor: worked %d joined' % i) cleaned.append(worker.pid) del self._pool[i] del self._poolctrl[worker.pid] if cleaned: for job in self._cache.values(): for worker_pid in job.worker_pids(): if worker_pid in cleaned and not job.ready(): job._worker_lost = time.time() continue if self._putlock is not None: for worker in cleaned: self._putlock.release() return True return False
def run_instance(instance_id): from .models import Instance instance = Instance.objects.get(pk=instance_id) if not instance.execution.is_active: return 'no active' instance.state = Instance.STARTED instance.task_id = run_instance.request.id instance.last_try = now() # Make sure to have cleaned feedback fields (re-executing an instance) instance.exit_code = None instance.stderr = '' instance.stdout = '' instance.traceback = '' instance.save() try: # ssh connection ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) addr = str(instance.node.mgmt_net.addr) try: ssh.connect(addr, username='******', key_filename=MAINTENANCE_KEY_PATH) except socket.error: instance.state = Instance.TIMEOUT instance.save() return 'socket error' channel = ssh.get_transport().open_session() channel.exec_command(instance.script.replace('\r', '')) while True: # Non-blocking is the secret ingridient in the async sauce select.select([channel], [], []) if channel.recv_ready(): instance.stdout += channel.recv(1024) if channel.recv_stderr_ready(): instance.stderr += channel.recv_stderr(1024) instance.save() if channel.exit_status_ready(): break instance.exit_code = exit_code = channel.recv_exit_status() instance.state = Instance.SUCCESS if exit_code == 0 else Instance.FAILURE channel.close() ssh.close() instance.save() except: instance.state = Instance.ERROR instance.traceback = ExceptionInfo(sys.exc_info()).traceback instance.save()
def test_on_worker_error(self): scratch = [None] def errback(einfo): scratch[0] = einfo pool = TaskPool(10) exc_info = None try: raise KeyError("foo") except KeyError: exc_info = ExceptionInfo(sys.exc_info()) pool.on_worker_error(errback, exc_info) self.assertTrue(scratch[0]) self.assertIs(scratch[0], exc_info)
def test_exception_info(self): try: raise LookupError('The quick brown fox jumps...') except Exception: einfo = ExceptionInfo() self.assertEqual(str(einfo), einfo.traceback) self.assertIsInstance(einfo.exception, LookupError) self.assertTupleEqual( einfo.exception.args, ('The quick brown fox jumps...', ), ) self.assertTrue(einfo.traceback) r = repr(einfo) self.assertTrue(r)
def handle_retry(self, exc, type_, tb, strtb): """Handle retry exception.""" # Create a simpler version of the RetryTaskError that stringifies # the original exception instead of including the exception instance. # This is for reporting the retry in logs, e-mail etc, while # guaranteeing pickleability. message, orig_exc = exc.args expanded_msg = "%s: %s" % (message, str(orig_exc)) einfo = ExceptionInfo((type_, type_(expanded_msg, None), tb)) self.task.on_retry(exc, self.task_id, self.args, self.kwargs, einfo=einfo) return einfo
def test_on_failure_Termianted(self): einfo = None try: raise Terminated('9') except Terminated: einfo = ExceptionInfo() self.assertIsNotNone(einfo) req = self.get_request(self.add.s(2, 2)) req.on_failure(einfo) req.eventer.send.assert_called_with( 'task-revoked', uuid=req.id, terminated=True, signum='9', expired=False, )
def test_exception_info(self): try: raise LookupError("The quick brown fox jumps...") except LookupError: exc_info = sys.exc_info() einfo = ExceptionInfo(exc_info) self.assertEqual(str(einfo), einfo.traceback) self.assertIsInstance(einfo.exception, LookupError) self.assertTupleEqual(einfo.exception.args, ("The quick brown fox jumps...", )) self.assertTrue(einfo.traceback) r = repr(einfo) self.assertTrue(r)
def test_on_failure_WorkerLostError(self): tw = TaskRequest(mytask.name, gen_unique_id(), [1], {"f": "x"}) try: raise WorkerLostError("do re mi") except WorkerLostError: exc_info = ExceptionInfo(sys.exc_info()) tw.on_failure(exc_info) self.assertEqual(mytask.backend.get_status(tw.task_id), states.FAILURE) mytask.ignore_result = True try: tw = TaskRequest(mytask.name, gen_unique_id(), [1], {"f": "x"}) tw.on_failure(exc_info) self.assertEqual(mytask.backend.get_status(tw.task_id), states.PENDING) finally: mytask.ignore_result = False
def handle_failure(self, task, store_errors=True): """Handle exception.""" req = task.request exc, type_, tb = self.retval, self.exc_type, self.tb if store_errors: task.backend.mark_as_failure(req.id, exc, self.strtb) exc = get_pickleable_exception(exc) einfo = ExceptionInfo((type_, exc, tb)) task.on_failure(exc, req.id, req.args, req.kwargs, einfo) signals.task_failure.send(sender=task, task_id=req.id, exception=exc, args=req.args, kwargs=req.kwargs, traceback=tb, einfo=einfo) return einfo
def handle_retry(self, task, store_errors=True): """Handle retry exception.""" # the exception raised is the RetryTaskError semi-predicate, # and it's exc' attribute is the original exception raised (if any). req = task.request type_, _, tb = sys.exc_info() try: reason = self.retval einfo = ExceptionInfo((type_, reason, tb)) if store_errors: task.backend.mark_as_retry(req.id, reason.exc, einfo.traceback) task.on_retry(reason.exc, req.id, req.args, req.kwargs, einfo) signals.task_retry.send(sender=task, request=req, reason=reason, einfo=einfo) return einfo finally: del(tb)
def trace_task(uuid, args, kwargs, request=None): R = I = None kwargs = kwdict(kwargs) try: push_task(task) task_request = Context(request or {}, args=args, called_directly=False, kwargs=kwargs) push_request(task_request) try: # -*- PRE -*- if prerun_receivers: send_prerun(sender=task, task_id=uuid, task=task, args=args, kwargs=kwargs) loader_task_init(uuid, task) if track_started: store_result(uuid, { 'pid': pid, 'hostname': hostname }, STARTED) # -*- TRACE -*- try: R = retval = fun(*args, **kwargs) state = SUCCESS except Ignore, exc: I, R = Info(IGNORED, exc), ExceptionInfo(internal=True) state, retval = I.state, I.retval except RetryTaskError, exc: I = Info(RETRY, exc) state, retval = I.state, I.retval R = I.handle_error_state(task, eager=eager) except Exception, exc: if propagate: raise I = Info(FAILURE, exc) state, retval = I.state, I.retval R = I.handle_error_state(task, eager=eager) [ subtask(errback).apply_async((uuid, )) for errback in task_request.errbacks or [] ]
def _test_on_failure(self, exception, logger): app = app_or_default() tid = uuid() tw = TaskRequest(mytask.name, tid, [4], {'f': 'x'}) try: raise exception except Exception: exc_info = ExceptionInfo() app.conf.CELERY_SEND_TASK_ERROR_EMAILS = True try: tw.on_failure(exc_info) self.assertTrue(logger.log.called) context = logger.log.call_args[0][2] self.assertEqual(mytask.name, context['name']) self.assertIn(tid, context['id']) finally: app.conf.CELERY_SEND_TASK_ERROR_EMAILS = False
def handle_failure(self, task, store_errors=True): """Handle exception.""" req = task.request type_, _, tb = sys.exc_info() try: exc = self.retval einfo = ExceptionInfo((type_, get_pickleable_exception(exc), tb)) if store_errors: task.backend.mark_as_failure(req.id, exc, einfo.traceback) task.on_failure(exc, req.id, req.args, req.kwargs, einfo) signals.task_failure.send(sender=task, task_id=req.id, exception=exc, args=req.args, kwargs=req.kwargs, traceback=einfo.tb, einfo=einfo) return einfo finally: del(tb)
def handle_retry(self, task, store_errors=True): """Handle retry exception.""" # Create a simpler version of the RetryTaskError that stringifies # the original exception instead of including the exception instance. # This is for reporting the retry in logs, email etc, while # guaranteeing pickleability. req = task.request type_, _, tb = sys.exc_info() try: exc = self.retval message, orig_exc = exc.args expanded_msg = '%s: %s' % (message, str(orig_exc)) einfo = ExceptionInfo((type_, type_(expanded_msg, None), tb)) if store_errors: task.backend.mark_as_retry(req.id, orig_exc, einfo.traceback) task.on_retry(exc, req.id, req.args, req.kwargs, einfo) return einfo finally: del (tb)
def simulate_async_error(self, exception): """ Take this exception and store it as an error in the result backend. This unifies the handling of broker-connection errors with any other type of error that might occur when running the task. So the same error-handling that might retry a task or display a useful message to the user can also handle this error. """ task_id = gen_unique_id() async_result = self.AsyncResult(task_id) einfo = ExceptionInfo(sys.exc_info()) async_result.backend.mark_as_failure( task_id, exception, traceback=einfo.traceback, ) return async_result
def test_send_email(self): app = app_or_default() old_mail_admins = app.mail_admins old_enable_mails = mytask.send_error_emails mail_sent = [False] def mock_mail_admins(*args, **kwargs): mail_sent[0] = True app.mail_admins = mock_mail_admins mytask.send_error_emails = True try: tw = TaskRequest(mytask.name, uuid(), [1], {"f": "x"}) try: raise KeyError("moofoobar") except: einfo = ExceptionInfo(sys.exc_info()) tw.on_failure(einfo) self.assertTrue(mail_sent[0]) mail_sent[0] = False mytask.send_error_emails = False tw.on_failure(einfo) self.assertFalse(mail_sent[0]) mail_sent[0] = False mytask.send_error_emails = True mytask.error_whitelist = [KeyError] tw.on_failure(einfo) self.assertTrue(mail_sent[0]) mail_sent[0] = False mytask.send_error_emails = True mytask.error_whitelist = [SyntaxError] tw.on_failure(einfo) self.assertFalse(mail_sent[0]) finally: app.mail_admins = old_mail_admins mytask.send_error_emails = old_enable_mails mytask.error_whitelist = ()
def _on_hard_timeout(job, i, hard_timeout): if job.ready(): return debug('hard time limit exceeded for %i', i) # Remove from cache and set return value to an exception exc_info = None try: raise TimeLimitExceeded(hard_timeout) except TimeLimitExceeded: exc_info = sys.exc_info() job._set(i, (False, ExceptionInfo(exc_info))) # Remove from _pool process, _index = _process_by_pid(job._worker_pid) # Run timeout callback if job._timeout_callback is not None: job._timeout_callback(soft=False, timeout=hard_timeout) if process: process.terminate()
def _test_on_failure(self, exception): tid = gen_unique_id() tw = TaskRequest(mytask.name, tid, [4], {"f": "x"}) try: raise exception except Exception: exc_info = ExceptionInfo(sys.exc_info()) logfh = StringIO() tw.logger.handlers = [] tw.logger = setup_logger(logfile=logfh, loglevel=logging.INFO) from celery import conf conf.CELERY_SEND_TASK_ERROR_EMAILS = True tw.on_failure(exc_info) logvalue = logfh.getvalue() self.assertIn(mytask.name, logvalue) self.assertIn(tid, logvalue) self.assertIn("ERROR", logvalue) conf.CELERY_SEND_TASK_ERROR_EMAILS = False
def Python(backend, log, server, cmds, run_async=False): script = '' functions = set() for cmd in cmds: if cmd.func not in functions: functions.add(cmd.func) script += textwrap.dedent(''.join( inspect.getsourcelines(cmd.func)[0])) script += '\n' for cmd in cmds: script += '# %s %s\n' % (cmd.func.__name__, cmd.args) log.state = log.STARTED log.script = '\n'.join((log.script, script)) log.save(update_fields=('script', 'state', 'updated_at')) stdout = '' try: for cmd in cmds: with CaptureStdout() as stdout: result = cmd(server) for line in stdout: log.stdout += line + '\n' if result: log.stdout += '# Result: %s\n' % result if run_async: log.save(update_fields=('stdout', 'updated_at')) except: log.exit_code = 1 log.state = log.FAILURE log.stdout += '\n'.join(stdout) log.traceback += ExceptionInfo(sys.exc_info()).traceback logger.error('Exception while executing %s on %s' % (backend, server)) else: if not log.exit_code: log.exit_code = 0 log.state = log.SUCCESS logger.debug('%s execution state on %s is %s' % (backend, server, log.state)) log.save()
def _test_on_failure(self, exception): app = app_or_default() tid = uuid() tw = TaskRequest(mytask.name, tid, [4], {"f": "x"}) try: raise exception except Exception: exc_info = ExceptionInfo(sys.exc_info()) logfh = WhateverIO() tw.logger.handlers = [] tw.logger = setup_logger(logfile=logfh, loglevel=logging.INFO, root=False) app.conf.CELERY_SEND_TASK_ERROR_EMAILS = True tw.on_failure(exc_info) logvalue = logfh.getvalue() self.assertIn(mytask.name, logvalue) self.assertIn(tid, logvalue) self.assertIn("ERROR", logvalue) app.conf.CELERY_SEND_TASK_ERROR_EMAILS = False
def raise_something(i): try: raise KeyError("FOO EXCEPTION") except KeyError: return ExceptionInfo(sys.exc_info())
def trace_task(uuid, args, kwargs, request=None): R = I = None kwargs = kwdict(kwargs) try: push_task(task) task_request = Context(request or {}, args=args, called_directly=False, kwargs=kwargs) push_request(task_request) try: # -*- PRE -*- if prerun_receivers: send_prerun(sender=task, task_id=uuid, task=task, args=args, kwargs=kwargs) loader_task_init(uuid, task) if track_started: store_result(uuid, { 'pid': pid, 'hostname': hostname }, STARTED) # -*- TRACE -*- try: R = retval = fun(*args, **kwargs) state = SUCCESS except Ignore as exc: I, R = Info(IGNORED, exc), ExceptionInfo(internal=True) except RetryTaskError as exc: I = Info(RETRY, exc) state, retval = I.state, I.retval R = I.handle_error_state(task, eager=eager) except Exception as exc: if propagate: raise I = Info(FAILURE, exc) state, retval = I.state, I.retval R = I.handle_error_state(task, eager=eager) [ subtask(errback).apply_async((uuid, )) for errback in task_request.errbacks or [] ] except BaseException as exc: raise else: # callback tasks must be applied before the result is # stored, so that result.children is populated. [ subtask(callback).apply_async((retval, )) for callback in task_request.callbacks or [] ] if publish_result: store_result(uuid, retval, SUCCESS) if task_on_success: task_on_success(retval, uuid, args, kwargs) if success_receivers: send_success(sender=task, result=retval) # -* POST *- if task_request.chord: on_chord_part_return(task) if task_after_return: task_after_return(state, retval, uuid, args, kwargs, None) if postrun_receivers: send_postrun(sender=task, task_id=uuid, task=task, args=args, kwargs=kwargs, retval=retval, state=state) finally: pop_task() pop_request() if not eager: try: backend_cleanup() loader_cleanup() except (KeyboardInterrupt, SystemExit, MemoryError): raise except Exception as exc: _logger.error('Process cleanup failed: %r', exc, exc_info=True) except Exception as exc: if eager: raise R = report_internal_error(task, exc) return R, I
def catch_exception(exception): try: raise exception except exception.__class__, exc: exc = default_app.backend.prepare_exception(exc) return exc, ExceptionInfo(sys.exc_info()).traceback
def get_ei(): try: raise WorkerLostError("do re mi") except WorkerLostError: return ExceptionInfo(sys.exc_info())
def get_ei(): try: raise KeyError("moofoobar") except: return ExceptionInfo(sys.exc_info())
def to_excinfo(exc): try: raise exc except: return ExceptionInfo(sys.exc_info())
def worker(inqueue, outqueue, initializer=None, initargs=(), maxtasks=None, sentinel=None): # Re-init logging system. # Workaround for http://bugs.python.org/issue6721#msg140215 # Python logging module uses RLock() objects which are broken after # fork. This can result in a deadlock (Issue #496). logger_names = logging.Logger.manager.loggerDict.keys() logger_names.append(None) # for root logger for name in logger_names: for handler in logging.getLogger(name).handlers: handler.createLock() logging._lock = threading.RLock() pid = os.getpid() assert maxtasks is None or (type(maxtasks) == int and maxtasks > 0) put = outqueue.put get = inqueue.get if hasattr(inqueue, '_reader'): def poll(timeout): if inqueue._reader.poll(timeout): return True, get() return False, None else: def poll(timeout): # noqa try: return True, get(timeout=timeout) except Queue.Empty: return False, None if hasattr(inqueue, '_writer'): inqueue._writer.close() outqueue._reader.close() if initializer is not None: initializer(*initargs) if SIG_SOFT_TIMEOUT is not None: signal.signal(SIG_SOFT_TIMEOUT, soft_timeout_sighandler) completed = 0 while maxtasks is None or (maxtasks and completed < maxtasks): if sentinel is not None and sentinel.is_set(): debug('worker got sentinel -- exiting') break try: ready, task = poll(1.0) if not ready: continue except (EOFError, IOError): debug('worker got EOFError or IOError -- exiting') break if task is None: debug('worker got sentinel -- exiting') break job, i, func, args, kwds = task put((ACK, (job, i, time.time(), pid))) try: result = (True, func(*args, **kwds)) except Exception: result = (False, ExceptionInfo(sys.exc_info())) try: put((READY, (job, i, result))) except Exception, exc: _, _, tb = sys.exc_info() wrapped = MaybeEncodingError(exc, result[1]) einfo = ExceptionInfo((MaybeEncodingError, wrapped, tb)) put((READY, (job, i, (False, einfo)))) completed += 1
if second: break second = True else: log.stdout += channel.makefile('rb', -1).read().decode('utf-8') log.stderr += channel.makefile_stderr('rb', -1).read().decode('utf-8') log.exit_code = channel.recv_exit_status() log.state = log.SUCCESS if log.exit_code == 0 else log.FAILURE logger.debug('%s execution state on %s is %s' % (backend, server, log.state)) log.save() except: log.state = log.ERROR log.traceback = ExceptionInfo(sys.exc_info()).traceback logger.error('Exception while executing %s on %s' % (backend, server)) logger.debug(log.traceback) log.save() finally: if log.state == log.STARTED: log.state = log.ABORTED log.save(update_fields=('state', 'updated_at')) if channel is not None: channel.close() def OpenSSH(backend, log, server, cmds, async=False): """ Executes cmds to remote server using SSH with connection resuse for maximum performance """
def get_ei(): try: raise KeyError('moofoobar') except: return ExceptionInfo()
def get_ei(): try: raise WorkerLostError('do re mi') except WorkerLostError: return ExceptionInfo()