def format(self, record): sformat = logging.Formatter.format color = self.colors.get(record.levelname) if color and self.use_color: msg = record.msg try: # safe_str will repr the color object # and color will break on non-string objects # so need to reorder calls based on type. # Issue #427 try: if isinstance(msg, string_t): record.msg = text_t(color(safe_str(msg))) else: record.msg = safe_str(color(msg)) except UnicodeDecodeError: record.msg = safe_str(msg) # skip colors except Exception as exc: record.msg = '<Unrepresentable {0!r}: {1!r}>'.format( type(msg), exc) record.exc_info = True return sformat(self, record) else: return safe_str(sformat(self, record))
def _log_error(self, exc_info): format = self.error_msg description = "raised exception" severity = logging.ERROR self.send_event("task-failed", uuid=self.id, exception=safe_repr(exc_info.exception), traceback=safe_str(exc_info.traceback)) if exc_info.internal: format = self.internal_error_msg description = "INTERNAL ERROR" severity = logging.CRITICAL context = {"hostname": self.hostname, "id": self.id, "name": self.name, "exc": safe_repr(exc_info.exception), "traceback": safe_str(exc_info.traceback), "args": safe_repr(self.args), "kwargs": safe_repr(self.kwargs), "description": description} logger.log(severity, format.strip(), context, exc_info=exc_info.exc_info, extra={"data": {"id": self.id, "name": self.name, "hostname": self.hostname}}) task_obj = self.app.tasks.get(self.name, object) task_obj.send_error_email(context, exc_info.exception)
def format(self, record): msg = logging.Formatter.format(self, record) color = self.colors.get(record.levelname) # reset exception info later for other handlers... einfo = sys.exc_info() if record.exc_info == 1 else record.exc_info if color and self.use_color: try: # safe_str will repr the color object # and color will break on non-string objects # so need to reorder calls based on type. # Issue #427 try: if isinstance(msg, string_t): return text_t(color(safe_str(msg))) return safe_str(color(msg)) except UnicodeDecodeError: # pragma: no cover return safe_str(msg) # skip colors except Exception as exc: # pylint: disable=broad-except prev_msg, record.exc_info, record.msg = ( record.msg, 1, '<Unrepresentable {0!r}: {1!r}>'.format( type(msg), exc ), ) try: return logging.Formatter.format(self, record) finally: record.msg, record.exc_info = prev_msg, einfo else: return safe_str(msg)
def entity_name(self, name, table=CHARS_REPLACE_TABLE): """Format AMQP queue name into a legal SQS queue name.""" if name.endswith('.fifo'): partial = name.rstrip('.fifo') partial = text_t(safe_str(partial)).translate(table) return partial + '.fifo' else: return text_t(safe_str(name)).translate(table)
def _fold_no_color(self, a, b): try: A = a.no_color() except AttributeError: A = string(a) try: B = b.no_color() except AttributeError: B = string(b) return safe_str(safe_str(A) + safe_str(B))
def mail_admins(self, subject, body, fail_silently=False, sender=None, to=None, host=None, port=None, user=None, password=None, timeout=None, use_ssl=False, use_tls=False): message = self.mail.Message(sender=sender, to=to, subject=safe_str(subject), body=safe_str(body)) mailer = self.mail.Mailer(host=host, port=port, user=user, password=password, timeout=timeout, use_ssl=use_ssl, use_tls=use_tls) mailer.send(message, fail_silently=fail_silently)
def write(self, data): """Write message to logging object.""" if _in_sighandler: return print(safe_str(data), file=sys.__stderr__) if getattr(self._thread, 'recurse_protection', False): # Logger is logging back to this file, so stop recursing. return data = data.strip() if data and not self.closed: self._thread.recurse_protection = True try: self.logger.log(self.loglevel, safe_str(data)) finally: self._thread.recurse_protection = False
def _log_error(self, task, req, einfo): eobj = einfo.exception = get_pickled_exception(einfo.exception) exception, traceback, exc_info, sargs, skwargs = ( safe_repr(eobj), safe_str(einfo.traceback), einfo.exc_info, safe_repr(req.args), safe_repr(req.kwargs), ) policy = get_log_policy(task, einfo, eobj) context = { 'hostname': req.hostname, 'id': req.id, 'name': task.name, 'exc': exception, 'traceback': traceback, 'args': sargs, 'kwargs': skwargs, 'description': policy.description, 'internal': einfo.internal, } logger.log(policy.severity, policy.format.strip(), context, exc_info=exc_info if policy.traceback else None, extra={'data': context})
def _log_error(self, task, req, einfo): eobj = einfo.exception = get_pickled_exception(einfo.exception) exception, traceback, exc_info, sargs, skwargs = ( safe_repr(eobj), safe_str(einfo.traceback), einfo.exc_info, safe_repr(req.args), safe_repr(req.kwargs), ) policy = get_log_policy(task, einfo, eobj) context = { "hostname": req.hostname, "id": req.id, "name": task.name, "exc": exception, "traceback": traceback, "args": sargs, "kwargs": skwargs, "description": policy.description, "internal": einfo.internal, } logger.log( policy.severity, policy.format.strip(), context, exc_info=exc_info if policy.traceback else None, extra={"data": context}, )
def on_start(self): if not self._custom_logging and self.redirect_stdouts: self.app.log.redirect_stdouts(self.redirect_stdouts_level) WorkController.on_start(self) # this signal can be used to e.g. change queues after # the -Q option has been applied. signals.celeryd_after_setup.send(sender=self.hostname, instance=self, conf=self.app.conf) if not self.app.conf.value_set_for("accept_content"): warnings.warn(CDeprecationWarning(W_PICKLE_DEPRECATED)) if self.purge: self.purge_messages() # Dump configuration to screen so we have some basic information # for when users sends bug reports. print( safe_str( "".join( [ string(self.colored.cyan(" \n", self.startup_info())), string(self.colored.reset(self.extra_info() or "")), ] ) ), file=sys.__stdout__, ) self.set_process_status("-active-") self.install_platform_tweaks(self)
def test_when_containing_high_chars(self): with patch('sys.getdefaultencoding') as encoding: encoding.return_value = 'ascii' s = u'The quiæk fåx jømps øver the lazy dåg' res = safe_str(s) self.assertIsInstance(res, bytes_t) self.assertEqual(len(s), len(res))
def test_when_encoding_utf8(self): with patch("sys.getfilesystemencoding") as encoding: encoding.return_value = "utf-8" self.assertEqual(default_encoding(), "utf-8") s = "The quiæk fåx jømps øver the lazy dåg" res = safe_str(s) self.assertIsInstance(res, str)
def on_start(self): app = self.app WorkController.on_start(self) # this signal can be used to e.g. change queues after # the -Q option has been applied. signals.celeryd_after_setup.send( sender=self.hostname, instance=self, conf=app.conf, ) if self.purge: self.purge_messages() # Dump configuration to screen so we have some basic information # for when users sends bug reports. use_image = self._term_supports_images() if use_image: self.termimage(static.logo_as_base64()) print(safe_str(''.join([ string(self.colored.cyan( ' \n', self.startup_info(artlines=not use_image))), string(self.colored.reset(self.extra_info() or '')), ])), file=sys.__stdout__) self.set_process_status('-active-') self.install_platform_tweaks(self) if not self._custom_logging and self.redirect_stdouts: app.log.redirect_stdouts(self.redirect_stdouts_level)
def formatException(self, ei): if ei and not isinstance(ei, tuple): ei = sys.exc_info() r = logging.Formatter.formatException(self, ei) if isinstance(r, str) and not PY3: return safe_str(r) return r
def test_when_encoding_utf8(self): with patch('sys.getdefaultencoding') as encoding: encoding.return_value = 'utf-8' self.assertEqual(default_encoding(), 'utf-8') s = 'The quiæk fåx jømps øver the lazy dåg' res = safe_str(s) self.assertIsInstance(res, str)
def mail_admins( self, subject, body, fail_silently=False, sender=None, to=None, host=None, port=None, user=None, password=None, timeout=None, use_ssl=False, use_tls=False, ): try: message = self.mail.Message(sender=sender, to=to, subject=safe_str(subject), body=safe_str(body)) mailer = self.mail.Mailer( host=host, port=port, user=user, password=password, timeout=timeout, use_ssl=use_ssl, use_tls=use_tls ) mailer.send(message) except Exception, exc: if not fail_silently: raise warnings.warn( self.mail.SendmailWarning( "Mail could not be sent: %r %r\n%r" % (exc, {"To": to, "Subject": subject}, traceback.format_stack()) ) )
def test_when_containing_high_chars(self): with patch("sys.getfilesystemencoding") as encoding: encoding.return_value = "ascii" s = "The quiæk fåx jømps øver the lazy dåg" res = safe_str(s) self.assertIsInstance(res, str) self.assertEqual(len(s), len(res))
def _log_error(self, einfo, send_failed_event=True): einfo.exception = get_pickled_exception(einfo.exception) exception, traceback, exc_info, internal, sargs, skwargs = ( safe_repr(einfo.exception), safe_str(einfo.traceback), einfo.exc_info, einfo.internal, safe_repr(self.args), safe_repr(self.kwargs), ) format = self.error_msg description = 'raised exception' severity = logging.ERROR if send_failed_event: self.send_event( 'task-failed', exception=exception, traceback=traceback, ) if internal: if isinstance(einfo.exception, MemoryError): raise MemoryError('Process got: %s' % (einfo.exception, )) elif isinstance(einfo.exception, Reject): format = self.rejected_msg description = 'rejected' severity = logging.WARN exc_info = einfo self.reject(requeue=einfo.exception.requeue) elif isinstance(einfo.exception, Ignore): format = self.ignored_msg description = 'ignored' severity = logging.INFO exc_info = None self.acknowledge() else: format = self.internal_error_msg description = 'INTERNAL ERROR' severity = logging.CRITICAL context = { 'hostname': self.hostname, 'id': self.id, 'name': self.name, 'exc': exception, 'traceback': traceback, 'args': sargs, 'kwargs': skwargs, 'description': description, } logger.log(severity, format.strip(), context, exc_info=exc_info, extra={'data': {'id': self.id, 'name': self.name, 'args': sargs, 'kwargs': skwargs, 'hostname': self.hostname, 'internal': internal}}) self.task.send_error_email(context, einfo.exception)
def on_retry(self, exc_info): """Handler called if the task should be retried.""" if self.task.acks_late: self.acknowledge() self.send_event('task-retried', exception=safe_repr(exc_info.exception.exc), traceback=safe_str(exc_info.traceback))
def test_when_unrepresentable(self): class O(object): def __repr__(self): raise KeyError('foo') self.assertIn('<Unrepresentable', safe_str(O()))
def test_when_unrepresentable(self): class UnrepresentableObject(object): def __repr__(self): raise KeyError('foo') assert '<Unrepresentable' in safe_str(UnrepresentableObject())
def __init__(self, c): self.c = c self.hostname = c.hostname self.node = c.app.control.mailbox.Node( safe_str(c.hostname), handlers=control.Panel.data, state=AttributeDict(app=c.app, hostname=c.hostname, consumer=c), )
def on_retry(self, exc_info): """Handler called if the task should be retried.""" self.send_event("task-retried", uuid=self.id, exception=safe_repr(exc_info.exception.exc), traceback=safe_str(exc_info.traceback)) if _does_info: info(self.retry_msg.strip(), { "id": self.id, "name": self.name, "exc": safe_repr(exc_info.exception.exc)}, exc_info=exc_info)
def on_retry(self, exc_info): """Handler called if the task should be retried.""" self.send_event('task-retried', uuid=self.id, exception=safe_repr(exc_info.exception.exc), traceback=safe_str(exc_info.traceback)) if _does_info: info(self.retry_msg.strip(), { 'id': self.id, 'name': self.name, 'exc': exc_info.exception})
def set_process_title(progname, info=None): """Set the :command:`ps` name for the currently running process. Only works if :pypi:`setproctitle` is installed. """ proctitle = '[{0}]'.format(progname) proctitle = '{0} {1}'.format(proctitle, info) if info else proctitle if _setproctitle: _setproctitle.setproctitle(safe_str(proctitle)) return proctitle
def startup_info(self): app = self.app concurrency = string(self.concurrency) appr = '{0}:{1:#x}'.format(app.main or '__main__', id(app)) if not isinstance(app.loader, AppLoader): loader = qualname(app.loader) if loader.startswith('celery.loaders'): # pragma: no cover loader = loader[14:] appr += ' ({0})'.format(loader) if self.autoscale: max, min = self.autoscale concurrency = '{{min={0}, max={1}}}'.format(min, max) pool = self.pool_cls if not isinstance(pool, string_t): pool = pool.__module__ concurrency += ' ({0})'.format(pool.split('.')[-1]) events = 'ON' if not self.send_events: events = 'OFF (enable -E to monitor this worker)' banner = BANNER.format( app=appr, hostname=safe_str(self.hostname), timestamp=datetime.now().replace(microsecond=0), version=VERSION_BANNER, conninfo=self.app.connection().as_uri(), results=maybe_sanitize_url( self.app.conf.result_backend or 'disabled', ), concurrency=concurrency, platform=safe_str(_platform.platform()), events=events, queues=app.amqp.queues.format(indent=0, indent_first=False), ).splitlines() # integrate the ASCII art. for i, x in enumerate(banner): try: banner[i] = ' '.join([ARTLINES[i], banner[i]]) except IndexError: banner[i] = ' ' * 16 + banner[i] return '\n'.join(banner) + '\n'
def emit_banner(self): # Dump configuration to screen so we have some basic information # for when users sends bug reports. use_image = term.supports_images() if use_image: print(term.imgcat(static.logo())) print(safe_str(''.join([ string(self.colored.cyan( ' \n', self.startup_info(artlines=not use_image))), string(self.colored.reset(self.extra_info() or '')), ])), file=sys.__stdout__)
def on_retry(self, exc_info): """Handler called if the task should be retried.""" if self.task.acks_late: self.acknowledge() self.send_event( "task-retried", exception=safe_repr(exc_info.exception.exc), traceback=safe_str(exc_info.traceback) ) if _does_info: info(self.retry_msg.strip(), {"id": self.id, "name": self.name, "exc": exc_info.exception})
def set_process_title(progname, info=None): """Set the ps name for the currently running process. Only works if :mod:`setproctitle` is installed. """ proctitle = "[%s]" % progname proctitle = "%s %s" % (proctitle, info) if info else proctitle if _setproctitle: _setproctitle.setproctitle(safe_str(proctitle)) return proctitle
def dispatch(self, cmd, arglist): """Dispatch and execute the command. Look-up order is: :attr:`builtins` -> :attr:`amqp`. """ if isinstance(arglist, string_t): arglist = shlex.split(safe_str(arglist)) if cmd in self.builtins: return getattr(self, self.builtins[cmd])(*arglist) fun, args, formatter = self.get_amqp_api_command(cmd, arglist) return formatter(fun(*args))
def onecmd(self, line): """Parse line and execute command.""" if isinstance(line, string_t): line = shlex.split(safe_str(line)) cmd, arg, line = self.parseline(line) if not line: return self.emptyline() self.lastcmd = line self.counter = next(self.inc_counter) try: self.respond(self.dispatch(cmd, arg)) except (AttributeError, KeyError) as exc: self.default(line) except Exception as exc: # pylint: disable=broad-except self.say(exc) self.needs_reconnect = True
def _deals_output(self, segment, editD, trans_segments, status_tokenizer, status): if self.out == 'moses': # Moses output is tokenizer if status_tokenizer == False:# tokenize output segment.source_text = TMUtilsMatching.pre_process(segment.source_text, self.src_lang, 'tokenizer', {}) segment.target_text = TMUtilsMatching.pre_process(segment.target_text, self.tgt_lang, 'tokenizer', {}) trans_segments.append((segment, editD)) return trans_segments, 'break' else: if status_tokenizer == True: # TM output is untokenizer segment.target_text = TMUtilsMatching.pre_process(segment.target_text.split(' '), self.tgt_lang, 'untokenizer', {}) segment.source_text = TMUtilsMatching.pre_process(segment.source_text.split(' '), self.src_lang, 'untokenizer', {}) trans_segments.append((segment, editD)) if status == 'translate': status = 'break' else: status = 'continue' #if editD == 100: # Add this if to obtain better matching time # status = 'break' logging.info("Final Output (Query -- Source -- Target): {} {} {}".format(safe_str(self.query_dic['query'] + ' -- '), safe_str(segment.source_text + ' -- '), safe_str(segment.target_text))) return trans_segments, status
def _log_error(self, einfo): exception, traceback, exc_info, internal, sargs, skwargs = ( safe_repr(einfo.exception), safe_str(einfo.traceback), einfo.exc_info, einfo.internal, safe_repr(self.args), safe_repr(self.kwargs), ) format = self.error_msg description = 'raised exception' severity = logging.ERROR self.send_event('task-failed', uuid=self.id, exception=exception, traceback=traceback) if internal: format = self.internal_error_msg description = 'INTERNAL ERROR' severity = logging.CRITICAL context = { 'hostname': self.hostname, 'id': self.id, 'name': self.name, 'exc': exception, 'traceback': traceback, 'args': sargs, 'kwargs': skwargs, 'description': description, } logger.log(severity, format.strip(), context, exc_info=exc_info, extra={'data': {'id': self.id, 'name': self.name, 'args': sargs, 'kwargs': skwargs, 'hostname': self.hostname, 'internal': internal}}) self.task.send_error_email(context, einfo.exception)
def test_on_retry(self): job = self.get_request(self.mytask.s(1, f='x')) job.eventer = Mock(name='.eventer') try: raise Retry('foo', KeyError('moofoobar')) except: einfo = ExceptionInfo() job.on_failure(einfo) job.eventer.send.assert_called_with( 'task-retried', uuid=job.id, exception=safe_repr(einfo.exception.exc), traceback=safe_str(einfo.traceback), ) prev, module._does_info = module._does_info, False try: job.on_failure(einfo) finally: module._does_info = prev einfo.internal = True job.on_failure(einfo)
def emit_banner(self): # Dump configuration to screen so we have some basic information # for when users sends bug reports. use_image = term.supports_images() if use_image: print(term.imgcat(static.logo())) print( safe_str( "".join( [ string( self.colored.cyan( " \n", self.startup_info(artlines=not use_image) ) ), string(self.colored.reset(self.extra_info() or "")), ] ) ), file=sys.__stdout__, )
def startup_info(self): app = self.app concurrency = unicode(self.concurrency) appr = '%s:0x%x' % (app.main or '__main__', id(app)) if not isinstance(app.loader, AppLoader): loader = qualname(app.loader) if loader.startswith('celery.loaders'): loader = loader[14:] appr += ' (%s)' % loader if self.autoscale: max, min = self.autoscale concurrency = '{min=%s, max=%s}' % (min, max) pool = self.pool_cls if not isinstance(pool, basestring): pool = pool.__module__ concurrency += ' (%s)' % pool.split('.')[-1] events = 'ON' if not self.send_events: events = 'OFF (enable -E to monitor this worker)' banner = ( BANNER % { 'app': appr, 'hostname': self.hostname, 'version': VERSION_BANNER, 'conninfo': self.app.connection().as_uri(), 'concurrency': concurrency, 'platform': safe_str(_platform.platform()), 'events': events, 'queues': app.amqp.queues.format(indent=0, indent_first=False), }).splitlines() # integrate the ASCII art. for i, x in enumerate(banner): try: banner[i] = ' '.join([ARTLINES[i], banner[i]]) except IndexError: banner[i] = ' ' * 16 + banner[i] return '\n'.join(banner) + '\n'
def startup_info(self): app = self.app concurrency = string(self.concurrency) appr = '{0}:0x{1:x}'.format(app.main or '__main__', id(app)) if not isinstance(app.loader, AppLoader): loader = qualname(app.loader) if loader.startswith('celery.loaders'): loader = loader[14:] appr += ' ({0})'.format(loader) if self.autoscale: max, min = self.autoscale concurrency = '{{min={0}, max={1}}}'.format(min, max) pool = self.pool_cls if not isinstance(pool, string_t): pool = pool.__module__ concurrency += ' ({0})'.format(pool.split('.')[-1]) events = 'ON' if not self.send_events: events = 'OFF (enable -E to monitor this worker)' banner = BANNER.format( app=appr, hostname=self.hostname, version=VERSION_BANNER, conninfo=self.app.connection().as_uri(), concurrency=concurrency, platform=safe_str(_platform.platform()), events=events, queues=app.amqp.queues.format(indent=0, indent_first=False), ).splitlines() # integrate the ASCII art. for i, x in enumerate(banner): try: banner[i] = ' '.join([ARTLINES[i], banner[i]]) except IndexError: banner[i] = ' ' * 16 + banner[i] return '\n'.join(banner) + '\n'
def satisfies_search_terms(task, search_terms): any_value_search_term = search_terms.get('any') result_search_term = search_terms.get('result') args_search_terms = search_terms.get('args') kwargs_search_terms = search_terms.get('kwargs') state_search_terms = search_terms.get('state') if not any([ any_value_search_term, result_search_term, args_search_terms, kwargs_search_terms, state_search_terms ]): return True if any_value_search_term: regex_term_filtered = [] for term in [ task.name, task.uuid, task.state, task.worker.hostname if task.worker else None, task.args, task.kwargs, safe_str(task.result) ]: if term in (None, 'None'): continue regex_term_filtered.append(term) search_match = re.search("(?:%s)" % re.escape(any_value_search_term), '|'.join(regex_term_filtered), re.I | re.U) else: search_match = None terms = [ state_search_terms and task.state in state_search_terms, any_value_search_term and search_match, result_search_term and result_search_term in task.result, kwargs_search_terms and all( stringified_dict_contains_value(k, v, task.kwargs) for k, v in kwargs_search_terms.items()), args_search_terms and task_args_contains_search_args(task.args, args_search_terms) ] return any(terms)
def satisfies_search_terms(task, search_terms): any_value_search_term = search_terms.get('any') result_search_term = search_terms.get('result') args_search_terms = search_terms.get('args') kwargs_search_terms = search_terms.get('kwargs') state_search_terms = search_terms.get('state') if not any([any_value_search_term, result_search_term, args_search_terms, kwargs_search_terms, state_search_terms]): return True terms = [ state_search_terms and task.state in state_search_terms, any_value_search_term and any_value_search_term in '|'.join( filter(None, [task.name, task.uuid, task.state, task.worker.hostname if task.worker else None, task.args, task.kwargs, safe_str(task.result)])), result_search_term and task.result and result_search_term in task.result, kwargs_search_terms and all( stringified_dict_contains_value(k, v, task.kwargs) for k, v in kwargs_search_terms.items() ), args_search_terms and task_args_contains_search_args(task.args, args_search_terms) ] return any(terms)
def on_start(self): app = self.app if not self._custom_logging and self.redirect_stdouts: app.log.redirect_stdouts(self.redirect_stdouts_level) WorkController.on_start(self) # this signal can be used to e.g. change queues after # the -Q option has been applied. signals.celeryd_after_setup.send( sender=self.hostname, instance=self, conf=app.conf, ) if self.purge: self.purge_messages() # Dump configuration to screen so we have some basic information # for when users sends bug reports. print(safe_str(''.join([ string(self.colored.cyan(' \n', self.startup_info())), string(self.colored.reset(self.extra_info() or '')), ])), file=sys.__stdout__) self.set_process_status('-active-') self.install_platform_tweaks(self)
def _log_error(self, einfo, send_failed_event=True): einfo.exception = get_pickled_exception(einfo.exception) eobj = einfo.exception exception, traceback, exc_info, internal, sargs, skwargs = ( safe_repr(eobj), safe_str(einfo.traceback), einfo.exc_info, einfo.internal, safe_repr(self.args), safe_repr(self.kwargs), ) task = self.task if task.throws and isinstance(eobj, task.throws): do_send_mail, severity, exc_info, description = ( False, logging.INFO, None, 'raised expected', ) else: do_send_mail, severity, description = ( True, logging.ERROR, 'raised unexpected', ) format = self.error_msg if internal: if isinstance(einfo.exception, MemoryError): raise MemoryError('Process got: %s' % (einfo.exception, )) elif isinstance(einfo.exception, Reject): format = self.rejected_msg description = 'rejected' severity = logging.WARN send_failed_event = False self.reject(requeue=einfo.exception.requeue) elif isinstance(einfo.exception, Ignore): format = self.ignored_msg description = 'ignored' severity = logging.INFO exc_info = None send_failed_event = False self.acknowledge() else: format = self.internal_error_msg description = 'INTERNAL ERROR' severity = logging.CRITICAL if send_failed_event: self.send_event( 'task-failed', exception=exception, traceback=traceback, ) context = { 'hostname': self.hostname, 'id': self.id, 'name': self.name, 'exc': exception, 'traceback': traceback, 'args': sargs, 'kwargs': skwargs, 'description': description, } logger.log(severity, format.strip(), context, exc_info=exc_info, extra={ 'data': { 'id': self.id, 'name': self.name, 'args': sargs, 'kwargs': skwargs, 'hostname': self.hostname, 'internal': internal } }) if do_send_mail: task.send_error_email(context, einfo.exception)
def entity_name(self, name, table=CHARS_REPLACE_TABLE): """Format AMQP queue name into a legal SQS queue name.""" return unicode(safe_str(name)).translate(table)
def entity_name(self, name, table=CHARS_REPLACE_TABLE): """Format AMQP queue name into a valid SLQS queue name.""" return text_t(safe_str(name)).translate(table)
def on_consumer_ready(self, consumer): signals.worker_ready.send(sender=consumer) print('celery@%s ready.' % safe_str(self.hostname))
def no_color(self): if self.s: return safe_str(reduce(self._fold_no_color, self.s)) return ''
def __unicode__(self): suffix = '' if self.enabled: suffix = RESET_SEQ return safe_str(self.embed() + safe_str(suffix))
def test_when_bytes(self): self.assertEqual(safe_str('foo'), 'foo')
def __str__(self): return safe_str(self.__unicode__())
def on_consumer_ready(self, consumer): signals.worker_ready.send(sender=consumer) logger.info('%s ready.', safe_str(self.hostname))
def test_when_not_string(self): o = object() self.assertEqual(safe_str(o), repr(o))
def test_when_containing_high_chars(self): s = 'The quiæk fåx jømps øver the lazy dåg' res = safe_str(s) self.assertIsInstance(res, string_t)
def test_when_unicode(self): self.assertIsInstance(safe_str('foo'), string_t)
def __repr__(self): return '<ModelEntry: {0} {1}(*{2}, **{3}) {4}>'.format( safe_str(self.name), self.task, safe_repr(self.args), safe_repr(self.kwargs), self.schedule, )
def embed(self): prefix = '' if self.enabled: prefix = self.op return safe_str(prefix) + safe_str(reduce(self._add, self.s))
def attrs(self, d, scheme=None): d = dict(self.scheme, **dict(scheme, **d or {}) if scheme else d) return self._attrsep.join( safe_str(self.attr(k, v)) for k, v in items(d) )
def entity_name(self, name: str, table: Optional[Dict[int, int]] = None) -> str: """Format AMQP queue name into a valid ServiceBus queue name.""" return str(safe_str(name)).translate(table or CHARS_REPLACE_TABLE)
def execute(self, l_best_segments, align_features, concordance): # show the status of the process status = '' #--> indicated if match or not match equal = False # --> indicated if applied equal sequences or not self.timer.start("preprocess") self.query_dic = self._preprocess() # uniform tags on query self.timer.stop("preprocess") # 1. sort segment list rank_segments = self._match_rank(l_best_segments) logging.info("SEGMENTS FROM ELASTICSEARCH") for seg in rank_segments: logging.info(u"{}".format(safe_str(seg[0][0].source_text))) if concordance: return [(segment[0][0], segment[1]) for segment in rank_segments] else: # 2. Analised the first segment segment = rank_segments[0][0][0] # Check the best (first) segment ini_editD = rank_segments[0][1] src_re = rank_segments[0][0][2] # src after applied regex src_re_reduce = rank_segments[0][0][3] # src after simplified regex if self.query_dic['query'] == segment.source_text: # The strings are identical logging.info("Identical Segments (Query -- Source): {} {}".format(safe_str(self.query_dic['query'] + ' -- '),safe_str(segment.source_text))) # source and query ar identical, then is not necessary untokenizer or check upper or lower equal = True editD = ini_editD print('---------- ' + str(editD)) self.trans_segments, status = self._deals_output(segment, editD, self.trans_segments, False, status) # decide next step else: # The best is not 100% match --> applied transformations logging.info("Different Segments (Query -- Source -- Target): {} {} {}".format(safe_str(self.query_dic['query']+ ' -- '), safe_str(segment.source_text+ ' -- '), safe_str(segment.target_text))) segment, editD, status, equal, status_tokenizer = self.execute_segment(segment, src_re, src_re_reduce, ini_editD, align_features, equal) if status == 'find': # or status == 'translate' segment.source_text, segment.target_text, status_tokenizer = self.style_string(segment.source_text, segment.target_text, status_tokenizer) # Adjust source and tgt text self.trans_segments, status = self._deals_output(segment, editD, self.trans_segments, status_tokenizer, status) if status == 'break': # There are not match on ElasticTM return self.trans_segments # 3. TM output --> Analized the list with the others segments if status == 'continue': if len(self.trans_segments) == 1: # The first element was 100% match rank_segments = rank_segments[1:] # Check if the retrieve segments are 100% match or apply transformations for seg_info in rank_segments: segment = seg_info[0][0] ini_editD = seg_info[1] src_re = seg_info[0][2] # src after applied regex src_re_reduce = seg_info[0][3] # src after simplified regex logging.info("(TM output) Need more segment (Query -- Source -- Target): {} {} {}".format(safe_str(self.query_dic['query']+ ' -- '), safe_str(segment.source_text+ ' -- '), safe_str(segment.target_text))) segment, editD, status, equal, status_tokenizer = self.execute_segment(segment, src_re, src_re_reduce, ini_editD, align_features, equal) if status == 'find': # or status == 'translate' segment.source_text, segment.target_text, status_tokenizer = self.style_string(segment.source_text, segment.target_text, status_tokenizer) # Adjust source and tgt text self.trans_segments, status = self._deals_output(segment, editD, self.trans_segments, status_tokenizer, status) # decide next step if status == 'break': #Meaning that the last segment has editD less that a threshold break if status == 'continue': continue return self.trans_segments
def on_consumer_ready(self, consumer): signals.worker_ready.send(sender=consumer) print('{0} ready.'.format(safe_str(self.hostname),))
def execute_segment(self, segment, src_re, src_re_reduce, ini_editD, align_features, equal): logging.info("Applied match PIPE") tgt_text = segment.target_text src_text = segment.source_text status = '' editD = ini_editD status_tokenizer = False if equal: if self.query == src_text: return segment, editD, 'find', equal, status_tokenizer else: equal = False if not equal: for op in self.pipe: #Indicate by parameters if op == 'regex': if self.query_dic['query'] != self.query_dic['query_re']: # If query has regex #and not TMMatching.check_upper_equal(self.query_dic['query'], self.query_dic['query_re']) logging.info("Applied Regex") self.timer.start("_regx_match") # ************************** Compare query_re with src_re --> simplified match = ini_editD if src_re != src_text: if src_re_reduce.lower() == self.query_dic['query_re_reduce'].lower(): # With simplified regular expression and in lowercase match = 100 # Perfect match tgt_text, src_text = self._regex_transform(segment.source_text, segment.target_text) ini_editD = self._tm_edit_distance(self.query_dic['query'],src_text, self.query_dic['query_re_reduce'], src_re_reduce) #match logging.info("After applied Regex Segment: {} {} {}".format(safe_str(src_text+ ' -- '), safe_str(tgt_text+ ' -- '), safe_str(ini_editD))) if match == 100: status = 'find' self.timer.stop("_regx_match") if op == 'tags': logging.info("Delete Tags") self.timer.start("_tags_match") src_text, tgt_text, status, reduce, ini_editD = self._match_tags(src_text, src_re_reduce, tgt_text, status, ini_editD) logging.info("After applied Tags: {} {} {}".format(safe_str(src_text+ ' -- '), safe_str(tgt_text+ ' -- '), safe_str(ini_editD))) self.timer.stop("_tags_match") if op == 'posTag': self.timer.start("fuzzy_match") upper = False if segment.source_pos is not None and segment.target_pos is not None: # This part need the pos tagger annotation squery, tok_query, pos_query = self.check_query_parameters() logging.info("Apply posTag matching") self.timer.start("fuzzy_preprocess") if status_tokenizer == False: # Tokenize source and target tgt_text = TMUtilsMatching.pre_process(tgt_text, self.tgt_lang, 'tokenizer', {}) # Pre-process tgt src_text = TMUtilsMatching.pre_process(src_text, self.src_lang, 'tokenizer', {}) # Tokenize tm_src self.query_dic['query_re_reduce_tok'] = TMUtilsMatching.pre_process(self.query_dic['query_re_reduce'], self.src_lang, 'tokenizer', {}) # Tokenize the simplified query status_tokenizer = True if 'universal' not in self.query_dic: self.query_dic['universal'] = TMUtilsMatching.segment_2_universal(tok_query.lower(), pos_query, self.src_lang) #print(self.query_dic['universal']) src_word_pos = TMUtilsMatching.segment_2_universal(src_text.lower(), segment.source_pos, self.src_lang) # [word, pos] tm_src segment tgt_word_pos = TMUtilsMatching.segment_2_universal(tgt_text.lower(), segment.target_pos, self.tgt_lang) # [word, pos] tm_tgt segment self.timer.stop("fuzzy_preprocess") if isinstance(self.query_dic['universal'], list) and isinstance(src_word_pos, list) and isinstance(tgt_word_pos, list): logging.info("Check unmatch word --> PosTag") if TMUtilsMatching.len_compare(pos_query.split(' '), segment.source_pos.split(' ')) is True and (tok_query != src_text): logging.info("Query and source have same length or only one difference") self.timer.start("search unmatch") tgt_un_match, tgt_position, operation, src_un_match, src_position, pos_tag = self._combine_feature_match(tok_query, src_word_pos, tgt_word_pos, align_features) self.timer.stop("search unmatch") logging.info("Unmatch word and operation: {} {}".format(safe_str(src_un_match), safe_str(operation), safe_str(ini_editD))) self.timer.start("create target unmatch") if src_un_match is not None: # Create new src src_text, upper = self._create_target_expression(src_text, src_position, operation, src_un_match, 'source', upper, pos_tag) # Improve edit distance src_re = TMUtilsMatching.pre_process(src_text, self.src_lang, 'reg_exp', self.match['regex'].re_pp) src_re_reduce = TMRegexMatch.simplified_name(src_re) penalize_match = self._improve_match(src_un_match, operation) ini_editD = self._tm_edit_distance(tok_query.lower(), src_text.lower(), self.query_dic['query_re_reduce_tok'].lower(), src_re_reduce.lower()) - penalize_match # match # Create new tgt if tgt_un_match is not None: tgt_text, upper = self._create_target_expression(tgt_text, tgt_position, operation, tgt_un_match, 'target', upper, pos_tag) # tgt_word, self.timer.stop("create target unmatch") logging.info("After applied posTag: {} {}".format(safe_str(src_text+ ' -- '), safe_str(tgt_text+ ' -- '), safe_str(ini_editD))) self.timer.stop("fuzzy_match") # Check if find or break some transformation if ini_editD > editD: editD = ini_editD if status == 'find' or status == 'break': segment.source_text = src_text segment.target_text = tgt_text return segment, editD, status, equal, status_tokenizer if editD >= self.min_match: segment.source_text = src_text segment.target_text = tgt_text status = 'find' else: #Call split rules if 'split' in self.pipe and not self.trans_segments: # Applied split if exist posTagger for source language and self.query_dic['pos'] src_text = None tgt_text = None editSplit = 0 # Split by sentences. list_sentences = TMUtilsMatching.pre_process(self.query_dic['tokenizer'], self.src_lang, 'split_sentences', {}) logging.info("split by Sentences : {} ".format(list_sentences)) # Check sentence first if len(list_sentences) > 1: split_match = TMSplitMatch([TMUtilsMatching.pre_process(q.split(' '), self.src_lang, 'untokenizer', {}) for q in list_sentences], [], self.src_lang, self.tgt_lang, 'sentence', self.machine_translation, self.domain) src_text, tgt_text, editSplit = split_match._match() #print('*****Only sentences *****') #print(src_text) #print(tgt_text) #print(editSplit) if editSplit >= self.min_match: # Check if split method return segments from ActivaTM segment.source_text, segment.target_text, editD = src_text, tgt_text, editSplit else: # Split in small phrase # Check if exist split for an especific pairs of languages lang_class = G_CONFIG.get_split_rules(self.src_lang, self.tgt_lang) if lang_class: logging.info("Split Query by Phrase") all_split, all_marks = self._splitByPhrase(lang_class, list_sentences) # Check if any split rule was applied if len(all_split) > 1: # print(list_query_split) split_match = TMSplitMatch(all_split, all_marks, self.src_lang, self.tgt_lang, 'phrase', self.machine_translation, self.domain) src_text, tgt_text, editSplit = split_match._match() if editSplit >= self.min_match: #Check if split method return segments from ActivaTM segment.source_text, segment.target_text, editD = src_text, tgt_text, editSplit if editD >= self.min_match: status = 'find' status_tokenizer = True else: if not self.trans_segments: #If doesn't found any match, prepare segment to automatic translation. If there aren't automatic translation, then return [] #logging.info("Prepare Automatic Translation : ") self.trans_segments.append((segment, editD)) status = 'break' # If exist segment on the list, break the for and there aren't translation return segment, editD, status, equal, status_tokenizer