def __call__(self, fileobj, keywords, comment_tags, options): self.state = self.stateWaiting self.msg = None self.keywords = keywords self.messages = [] self.encoding = parse_encoding(fileobj) or "ascii" tokens = tokenize.generate_tokens(fileobj.readline) for (ttype, tstring, stup, etup, line) in tokens: self.state(ttype, tstring, stup[0]) return self.messages
def __call__(self, fileobj, keywords, comment_tags, options): self.state = self.stateWaiting self.msg = None self.keywords = keywords self.state = self.stateWaiting self.msg = None self.messages = [] self.encoding = parse_encoding(fileobj) or "ascii" tokens = tokenize.generate_tokens(fileobj.readline) for (ttype, tstring, stup, etup, line) in tokens: self.state(ttype, tstring, stup[0]) return self.messages
def Extract(fobj, keywords, comment_tags, config, method): d = {} encoding = parse_encoding (fobj) or config.get \ ("encoding", default = "utf-8") code = fobj.read() try: exec(code, globals(), d) except Exception as exc: print("*** Error during Babel.Extract", fobj, "*" * 40) print(exc) print(code) raise else: return d["main"](encoding, config, method)
def Extract (fobj, keywords, comment_tags, config, method) : d = {} encoding = parse_encoding (fobj) or config.get \ ("encoding", default = "utf-8") code = fobj.read () try : exec (code, globals (), d) except Exception as exc : print ("*** Error during Babel.Extract", fobj, "*" * 40) print (exc) print (code) raise else : return d ["main"] (encoding, config, method)
def __call__(self, fileobj, keywords, comment_tags, options): if not isinstance(keywords, dict): keywords = dict.fromkeys(keywords) if 'ngettext' in keywords: keywords['ngettext'] = (1, 2) keywords['pluralize'] = (1, 2) self.state = self.stateWaiting self.msg = None self.keywords = keywords self.messages = [] self.encoding = parse_encoding(fileobj) or "ascii" tokens = tokenize.generate_tokens(fileobj.readline) for (ttype, tstring, stup, etup, line) in tokens: self.state(ttype, tstring, stup[0]) return self.messages
def extract_python(fileobj, keywords, comment_tags, options): """Extract messages from Python source code. It returns an iterator yielding tuples in the following form ``(lineno, funcname, message, comments)``. :param fileobj: the seekable, file-like object the messages should be extracted from :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) :rtype: ``iterator`` """ funcname = lineno = message_lineno = None call_stack = -1 buf = [] messages = [] translator_comments = [] in_def = in_translator_comments = False comment_tag = None encoding = parse_encoding(fileobj) or options.get('encoding', 'UTF-8') future_flags = parse_future_flags(fileobj, encoding) if PY2: next_line = fileobj.readline else: next_line = lambda: fileobj.readline().decode(encoding) tokens = generate_tokens(next_line) for tok, value, (lineno, _), _, _ in tokens: if call_stack == -1 and tok == NAME and value in ('def', 'class'): in_def = True elif tok == OP and value == '(': if in_def: # Avoid false positives for declarations such as: # def gettext(arg='message'): in_def = False continue if funcname: message_lineno = lineno call_stack += 1 elif in_def and tok == OP and value == ':': # End of a class definition without parens in_def = False continue elif call_stack == -1 and tok == COMMENT: # Strip the comment token from the line if PY2: value = value.decode(encoding) value = value[1:].strip() if in_translator_comments and \ translator_comments[-1][0] == lineno - 1: # We're already inside a translator comment, continue appending translator_comments.append((lineno, value)) continue # If execution reaches this point, let's see if comment line # starts with one of the comment tags for comment_tag in comment_tags: if value.startswith(comment_tag): in_translator_comments = True translator_comments.append((lineno, value)) break elif funcname and call_stack == 0: nested = (tok == NAME and value in keywords) if (tok == OP and value == ')') or nested: if buf: messages.append(''.join(buf)) del buf[:] else: messages.append(None) if len(messages) > 1: messages = tuple(messages) else: messages = messages[0] # Comments don't apply unless they immediately preceed the # message if translator_comments and \ translator_comments[-1][0] < message_lineno - 1: translator_comments = [] yield (message_lineno, funcname, messages, [comment[1] for comment in translator_comments]) funcname = lineno = message_lineno = None call_stack = -1 messages = [] translator_comments = [] in_translator_comments = False if nested: funcname = value elif tok == STRING: # Unwrap quotes in a safe manner, maintaining the string's # encoding # https://sourceforge.net/tracker/?func=detail&atid=355470& # aid=617979&group_id=5470 code = compile('# coding=%s\n%s' % (str(encoding), value), '<string>', 'eval', future_flags) value = eval(code, {'__builtins__': {}}, {}) if PY2 and not isinstance(value, text_type): value = value.decode(encoding) buf.append(value) elif tok == OP and value == ',': if buf: messages.append(''.join(buf)) del buf[:] else: messages.append(None) if translator_comments: # We have translator comments, and since we're on a # comma(,) user is allowed to break into a new line # Let's increase the last comment's lineno in order # for the comment to still be a valid one old_lineno, old_comment = translator_comments.pop() translator_comments.append((old_lineno + 1, old_comment)) elif call_stack > 0 and tok == OP and value == ')': call_stack -= 1 elif funcname and call_stack == -1: funcname = None elif tok == NAME and value in keywords: funcname = value
def extract_python(fileobj, keywords, comment_tags, options): """Extract messages from Python source code. :param fileobj: the seekable, file-like object the messages should be extracted from :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) :return: an iterator over ``(lineno, funcname, message, comments)`` tuples :rtype: ``iterator`` """ funcname = lineno = message_lineno = None call_stack = -1 buf = [] messages = [] translator_comments = [] in_def = in_translator_comments = False comment_tag = None encoding = parse_encoding(fileobj) or options.get('encoding', 'iso-8859-1') tokens = generate_tokens(fileobj.readline) for tok, value, (lineno, _), _, _ in tokens: if call_stack == -1 and tok == NAME and value in ('def', 'class'): in_def = True elif tok == OP and value == '(': if in_def: # Avoid false positives for declarations such as: # def gettext(arg='message'): in_def = False continue if funcname: message_lineno = lineno call_stack += 1 elif in_def and tok == OP and value == ':': # End of a class definition without parens in_def = False continue elif call_stack == -1 and tok == COMMENT: # Strip the comment token from the line value = value.decode(encoding)[1:].strip() if in_translator_comments and \ translator_comments[-1][0] == lineno - 1: # We're already inside a translator comment, continue appending translator_comments.append((lineno, value)) continue # If execution reaches this point, let's see if comment line # starts with one of the comment tags for comment_tag in comment_tags: if value.startswith(comment_tag): in_translator_comments = True translator_comments.append((lineno, value)) break elif funcname and call_stack == 0: if tok == OP and value == ')': if buf: messages.append(''.join(buf)) del buf[:] else: messages.append(None) if len(messages) > 1: messages = tuple(messages) else: messages = messages[0] # Comments don't apply unless they immediately preceed the # message if translator_comments and \ translator_comments[-1][0] < message_lineno - 1: translator_comments = [] yield (message_lineno, funcname, messages, [comment[1] for comment in translator_comments]) funcname = lineno = message_lineno = None call_stack = -1 messages = [] translator_comments = [] in_translator_comments = False elif tok == STRING: # Unwrap quotes in a safe manner, maintaining the string's # encoding # https://sourceforge.net/tracker/?func=detail&atid=355470& # aid=617979&group_id=5470 value = eval('# coding=%s\n%s' % (encoding, value), {'__builtins__':{}}, {}) if isinstance(value, str): value = value.decode(encoding) buf.append(value) elif tok == OP and value == ',': if buf: messages.append(''.join(buf)) del buf[:] else: messages.append(None) if translator_comments: # We have translator comments, and since we're on a # comma(,) user is allowed to break into a new line # Let's increase the last comment's lineno in order # for the comment to still be a valid one old_lineno, old_comment = translator_comments.pop() translator_comments.append((old_lineno+1, old_comment)) elif call_stack > 0 and tok == OP and value == ')': call_stack -= 1 elif funcname and call_stack == -1: funcname = None elif tok == NAME and value in keywords: funcname = value
def extract_python(fileobj, keywords, comment_tags, options): """Extract messages from Python source code, This is patched extract_python from Babel to support keyword argument mapping. `kwargs_maps` option: names of keyword arguments will be mapping to index of messages array. `cleandoc_keywords` option: a list of keywords to clean up the extracted messages with `cleandoc`. """ from inspect import cleandoc funcname = lineno = message_lineno = None kwargs_maps = func_kwargs_map = None call_stack = -1 buf = [] messages = [] messages_kwargs = {} translator_comments = [] in_def = in_translator_comments = False comment_tag = None encoding = str( parse_encoding(fileobj) or options.get('encoding', 'iso-8859-1')) kwargs_maps = _DEFAULT_KWARGS_MAPS.copy() if 'kwargs_maps' in options: kwargs_maps.update(options['kwargs_maps']) cleandoc_keywords = set(_DEFAULT_CLEANDOC_KEYWORDS) if 'cleandoc_keywords' in options: cleandoc_keywords.update(options['cleandoc_keywords']) tokens = generate_tokens(fileobj.readline) tok = value = None for _ in tokens: prev_tok, prev_value = tok, value tok, value, (lineno, _), _, _ = _ if call_stack == -1 and tok == NAME and value in ('def', 'class'): in_def = True elif tok == OP and value == '(': if in_def: # Avoid false positives for declarations such as: # def gettext(arg='message'): in_def = False continue if funcname: message_lineno = lineno call_stack += 1 kwarg_name = None elif in_def and tok == OP and value == ':': # End of a class definition without parens in_def = False continue elif call_stack == -1 and tok == COMMENT: # Strip the comment token from the line value = value.decode(encoding)[1:].strip() if in_translator_comments and \ translator_comments[-1][0] == lineno - 1: # We're already inside a translator comment, continue # appending translator_comments.append((lineno, value)) continue # If execution reaches this point, let's see if comment line # starts with one of the comment tags for comment_tag in comment_tags: if value.startswith(comment_tag): in_translator_comments = True translator_comments.append((lineno, value)) break elif funcname and call_stack == 0: if tok == OP and value == ')': if buf: message = ''.join(buf) if kwarg_name in func_kwargs_map: messages_kwargs[kwarg_name] = message else: messages.append(message) del buf[:] else: messages.append(None) for name, message in messages_kwargs.iteritems(): if name not in func_kwargs_map: continue index = func_kwargs_map[name] while index >= len(messages): messages.append(None) messages[index - 1] = message if funcname in cleandoc_keywords: messages = [m and cleandoc(m) for m in messages] if len(messages) > 1: messages = tuple(messages) else: messages = messages[0] # Comments don't apply unless they immediately preceed the # message if translator_comments and \ translator_comments[-1][0] < message_lineno - 1: translator_comments = [] yield (message_lineno, funcname, messages, [comment[1] for comment in translator_comments]) funcname = lineno = message_lineno = None kwarg_name = func_kwargs_map = None call_stack = -1 messages = [] messages_kwargs = {} translator_comments = [] in_translator_comments = False elif tok == STRING: # Unwrap quotes in a safe manner, maintaining the string's # encoding # https://sourceforge.net/tracker/?func=detail&atid=355470& # aid=617979&group_id=5470 value = eval('# coding=%s\n%s' % (encoding, value), {'__builtins__': {}}, {}) if isinstance(value, str): value = value.decode(encoding) buf.append(value) elif tok == OP and value == '=' and prev_tok == NAME: kwarg_name = prev_value elif tok == OP and value == ',': if buf: message = ''.join(buf) if kwarg_name in func_kwargs_map: messages_kwargs[kwarg_name] = message else: messages.append(message) del buf[:] else: messages.append(None) kwarg_name = None if translator_comments: # We have translator comments, and since we're on a # comma(,) user is allowed to break into a new line # Let's increase the last comment's lineno in order # for the comment to still be a valid one old_lineno, old_comment = translator_comments.pop() translator_comments.append( (old_lineno + 1, old_comment)) elif call_stack > 0 and tok == OP and value == ')': call_stack -= 1 elif funcname and call_stack == -1: funcname = func_kwargs_map = kwarg_name = None elif tok == NAME and value in keywords: funcname = value func_kwargs_map = kwargs_maps.get(funcname, {}) kwarg_name = None
except KeyError: assert True class FixedOffsetTimezoneTestCase(unittest.TestCase): def test_zone_negative_offset(self): self.assertEqual('Etc/GMT-60', util.FixedOffsetTimezone(-60).zone) def test_zone_zero_offset(self): self.assertEqual('Etc/GMT+0', util.FixedOffsetTimezone(0).zone) def test_zone_positive_offset(self): self.assertEqual('Etc/GMT+330', util.FixedOffsetTimezone(330).zone) parse_encoding = lambda s: util.parse_encoding(BytesIO(s.encode('utf-8'))) def test_parse_encoding_defined(): assert parse_encoding(u'# coding: utf-8') == 'utf-8' def test_parse_encoding_undefined(): assert parse_encoding(u'') is None def test_parse_encoding_non_ascii(): assert parse_encoding(u'K\xf6ln') is None @pytest.mark.parametrize('source, result', [
def Python (fobj, keywords, comment_tags, config, method) : """Code taken from babel directly but extended: * collect doc strings of functions and classes as well * it is possible to specify a list existing message catalog's so that only new translation keys will be added to the new catalog. """ encoding = parse_encoding (fobj) or config.get \ ("encoding", method, default = "utf-8") add_doc_strings = config.get ("add_doc_strings", method, "") == "True" ### now that we know that we have to parse this file, lets start tokens = generate_tokens (fobj.readline) in_def = in_translator_comments = False wait_for_doc_string = False funcname = lineno = message_lineno = None call_stack = -1 buf = [] messages = [] translator_comments = [] comment_tag = None doc_string_ignore_tok = set ((NL, NEWLINE, INDENT, STRING)) for tok, value, (lineno, _), _, _ in tokens : if wait_for_doc_string and tok not in doc_string_ignore_tok : wait_for_doc_string = False if call_stack == -1 and tok == NAME and value in ("def", "class") : in_def = True elif tok == OP and value == "(" and funcname : message_lineno = lineno call_stack += 1 elif tok == OP and value == ":" : in_def = False wait_for_doc_string = add_doc_strings continue elif call_stack == -1 and tok == COMMENT : # Strip the comment token from the line value = value.decode (encoding) [1:].strip () if ( in_translator_comments and translator_comments [-1] [0] == lineno - 1 ) : # We're already inside a translator comment, continue appending translator_comments.append ((lineno, value)) continue # If execution reaches this point, let's see if comment line # starts with one of the comment tags for comment_tag in comment_tags : if value.startswith (comment_tag) : in_translator_comments = True translator_comments.append ((lineno, value)) break elif wait_for_doc_string and tok == STRING : ### found a doc_string msg = TFL.normalized_indent \ (TFL.I18N.safe_eval (value, encoding)).strip () yield (lineno, funcname, msg, [], None) wait_for_doc_string = False elif funcname and call_stack == 0 : if tok == OP and value == ")" : if buf : messages.append ("".join (buf).strip ()) del buf [:] else: messages.append (None) if len (messages) > 1 : messages = tuple (messages) else: messages = messages [0] # Comments don't apply unless they immediately preceed the # message if ( translator_comments and translator_comments [-1][0] < message_lineno - 1 ) : translator_comments = [] yield \ ( message_lineno , funcname , messages , [comment [1] for comment in translator_comments] , None ) funcname = lineno = message_lineno = None call_stack = -1 messages = [] translator_comments = [] in_translator_comments = False elif tok == STRING : # Unwrap quotes in a safe manner, maintaining the string's # encoding # https://sourceforge.net/tracker/?func=detail&atid=355470& # aid=617979&group_id=5470 value = TFL.I18N.safe_eval (value, encoding) if isinstance (value, str) : value = value.decode (encoding) buf.append (value) elif tok == OP and value == "," : if buf : messages.append ("".join (buf).strip ()) del buf [:] else: messages.append (None) if translator_comments : # We have translator comments, and since we're on a # comma(,) user is allowed to break into a new line # Let's increase the last comment's lineno in order # for the comment to still be a valid one old_lineno, old_comment = translator_comments.pop () translator_comments.append ((old_lineno + 1, old_comment)) elif call_stack > 0 and tok == OP and value == ")" : call_stack -= 1 elif funcname and call_stack == -1 : funcname = None elif tok == NAME and value in keywords : funcname = value
def extract_python(fileobj, keywords, comment_tags, options): """Extract messages from Python source code, This is patched extract_python from Babel to support keyword argument mapping. `kwargs_maps` option: names of keyword arguments will be mapping to index of messages array. `cleandoc_keywords` option: a list of keywords to clean up the extracted messages with `cleandoc`. """ funcname = lineno = message_lineno = None kwargs_maps = func_kwargs_map = None call_stack = -1 buf = [] messages = [] messages_kwargs = {} translator_comments = [] in_def = in_translator_comments = False comment_tag = None encoding = parse_encoding(fileobj) \ or options.get('encoding', 'iso-8859-1') kwargs_maps = _DEFAULT_KWARGS_MAPS.copy() if 'kwargs_maps' in options: kwargs_maps.update(options['kwargs_maps']) cleandoc_keywords = set(_DEFAULT_CLEANDOC_KEYWORDS) if 'cleandoc_keywords' in options: cleandoc_keywords.update(options['cleandoc_keywords']) tokens = generate_tokens(fileobj.readline) tok = value = None for _ in tokens: prev_tok, prev_value = tok, value tok, value, (lineno, _), _, _ = _ if call_stack == -1 and tok == NAME and value in ('def', 'class'): in_def = True elif tok == OP and value == '(': if in_def: # Avoid false positives for declarations such as: # def gettext(arg='message'): in_def = False continue if funcname: message_lineno = lineno call_stack += 1 kwarg_name = None elif in_def and tok == OP and value == ':': # End of a class definition without parens in_def = False continue elif call_stack == -1 and tok == COMMENT: # Strip the comment token from the line value = value.decode(encoding)[1:].strip() if in_translator_comments and \ translator_comments[-1][0] == lineno - 1: # We're already inside a translator comment, continue # appending translator_comments.append((lineno, value)) continue # If execution reaches this point, let's see if comment line # starts with one of the comment tags for comment_tag in comment_tags: if value.startswith(comment_tag): in_translator_comments = True translator_comments.append((lineno, value)) break elif funcname and call_stack == 0: if tok == OP and value == ')': if buf: message = ''.join(buf) if kwarg_name in func_kwargs_map: messages_kwargs[kwarg_name] = message else: messages.append(message) del buf[:] else: messages.append(None) for name, message in messages_kwargs.iteritems(): if name not in func_kwargs_map: continue index = func_kwargs_map[name] while index >= len(messages): messages.append(None) messages[index - 1] = message if funcname in cleandoc_keywords: messages = [m and cleandoc(m) for m in messages] if len(messages) > 1: messages = tuple(messages) else: messages = messages[0] # Comments don't apply unless they immediately preceed the # message if translator_comments and \ translator_comments[-1][0] < message_lineno - 1: translator_comments = [] yield (message_lineno, funcname, messages, [comment[1] for comment in translator_comments]) funcname = lineno = message_lineno = None kwarg_name = func_kwargs_map = None call_stack = -1 messages = [] messages_kwargs = {} translator_comments = [] in_translator_comments = False elif tok == STRING: # Unwrap quotes in a safe manner, maintaining the string's # encoding # https://sourceforge.net/tracker/?func=detail&atid=355470& # aid=617979&group_id=5470 value = eval('# coding=%s\n%s' % (encoding, value), {'__builtins__':{}}, {}) if isinstance(value, str): value = value.decode(encoding) buf.append(value) elif tok == OP and value == '=' and prev_tok == NAME: kwarg_name = prev_value elif tok == OP and value == ',': if buf: message = ''.join(buf) if kwarg_name in func_kwargs_map: messages_kwargs[kwarg_name] = message else: messages.append(message) del buf[:] else: messages.append(None) kwarg_name = None if translator_comments: # We have translator comments, and since we're on a # comma(,) user is allowed to break into a new line # Let's increase the last comment's lineno in order # for the comment to still be a valid one old_lineno, old_comment = translator_comments.pop() translator_comments.append((old_lineno+1, old_comment)) elif call_stack > 0 and tok == OP and value == ')': call_stack -= 1 elif funcname and call_stack == -1: funcname = func_kwargs_map = kwarg_name = None elif tok == NAME and value in keywords: funcname = value func_kwargs_map = kwargs_maps.get(funcname, {}) kwarg_name = None
assert True class FixedOffsetTimezoneTestCase(unittest.TestCase): def test_zone_negative_offset(self): self.assertEqual('Etc/GMT-60', util.FixedOffsetTimezone(-60).zone) def test_zone_zero_offset(self): self.assertEqual('Etc/GMT+0', util.FixedOffsetTimezone(0).zone) def test_zone_positive_offset(self): self.assertEqual('Etc/GMT+330', util.FixedOffsetTimezone(330).zone) parse_encoding = lambda s: util.parse_encoding(BytesIO(s.encode('utf-8'))) def test_parse_encoding_defined(): assert parse_encoding(u'# coding: utf-8') == 'utf-8' def test_parse_encoding_undefined(): assert parse_encoding(u'') is None def test_parse_encoding_non_ascii(): assert parse_encoding(u'K\xf6ln') is None @pytest.mark.parametrize('source, result', [
def Python (fobj, keywords, comment_tags, config, method) : """Code taken from babel directly but extended: * collect doc strings of functions and classes as well * it is possible to specify a list existing message catalog's so that only new translation keys will be added to the new catalog. """ encoding = parse_encoding (fobj) or config.get \ ("encoding", method, default = "utf-8") add_doc_strings = config.get ("add_doc_strings", method, "") == "True" ### `generate_tokens` expects strings, `fobj` is binary def _readline () : result = fobj.readline () return result.decode (encoding) ### now that we know that we have to parse this file, lets start tokens = generate_tokens (_readline) in_def = in_translator_comments = False wait_for_doc_string = False funcname = lineno = message_lineno = None call_stack = -1 buf = [] messages = [] translator_comments = [] comment_tag = None doc_string_ignore_tok = set ((NL, NEWLINE, INDENT, STRING)) for tok, value, (lineno, _), _, _ in tokens : if wait_for_doc_string and tok not in doc_string_ignore_tok : wait_for_doc_string = False if call_stack == -1 and tok == NAME and value in ("def", "class") : in_def = True elif tok == OP and value == "(" and funcname : message_lineno = lineno call_stack += 1 elif tok == OP and value == ":" : in_def = False wait_for_doc_string = add_doc_strings continue elif call_stack == -1 and tok == COMMENT : # Strip the comment token from the line value = value [1:].strip () if ( in_translator_comments and translator_comments [-1] [0] == lineno - 1 ) : # We're already inside a translator comment, continue appending translator_comments.append ((lineno, value)) continue # If execution reaches this point, let's see if comment line # starts with one of the comment tags for comment_tag in comment_tags : if value.startswith (comment_tag) : in_translator_comments = True translator_comments.append ((lineno, value)) break elif wait_for_doc_string and tok == STRING : ### found a doc_string msg = TFL.normalized_indent \ (TFL.I18N.safe_eval (value, encoding)).strip () yield (lineno, funcname, msg, [], None) wait_for_doc_string = False elif funcname and call_stack == 0 : if tok == OP and value == ")" : if buf : messages.append ("".join (buf).strip ()) del buf [:] else: messages.append (None) if len (messages) > 1 : messages = tuple (messages) else: messages = messages [0] # Comments don't apply unless they immediately preceed the # message if ( translator_comments and translator_comments [-1][0] < message_lineno - 1 ) : translator_comments = [] yield \ ( message_lineno , funcname , messages , [comment [1] for comment in translator_comments] , None ) funcname = lineno = message_lineno = None call_stack = -1 messages = [] translator_comments = [] in_translator_comments = False elif tok == STRING : # Unwrap quotes in a safe manner, maintaining the string's # encoding # https://sourceforge.net/tracker/?func=detail&atid=355470& # aid=617979&group_id=5470 value = TFL.I18N.safe_eval (value, encoding) if isinstance (value, bytes) : value = value.decode (encoding) buf.append (value) elif tok == OP and value == "," : if buf : messages.append ("".join (buf).strip ()) del buf [:] else: messages.append (None) if translator_comments : # We have translator comments, and since we're on a # comma(,) user is allowed to break into a new line # Let's increase the last comment's lineno in order # for the comment to still be a valid one old_lineno, old_comment = translator_comments.pop () translator_comments.append ((old_lineno + 1, old_comment)) elif call_stack > 0 and tok == OP and value == ")" : call_stack -= 1 elif funcname and call_stack == -1 : funcname = None elif tok == NAME and value in keywords : funcname = value
def extract_python(fileobj, keywords, comment_tags, options): """Extract messages from Python source code. It returns an iterator yielding tuples in the following form ``(lineno, funcname, message, comments)``. Adapted from the corresponding pybabel built-in function, so that it understands the syntax of our custom `trans`/`trans_lazy` function and correctly parses the default message and the context. :param fileobj: the seekable, file-like object the messages should be extracted from :param keywords: a list of keywords (i.e. function names) that should be recognized as translation functions :param comment_tags: a list of translator tags to search for and include in the results :param options: a dictionary of additional options (optional) :rtype: ``iterator`` """ funcname = lineno = message_lineno = None call_stack = -1 buf = [] messages = [] translator_comments = [] in_def = in_translator_comments = False comment_tag = None encoding = parse_encoding(fileobj) or options.get("encoding", "UTF-8") future_flags = parse_future_flags(fileobj, encoding) if PY2: next_line = fileobj.readline else: next_line = lambda: fileobj.readline().decode(encoding) tokens = generate_tokens(next_line) for tok, value, (lineno, _), _, _ in tokens: if call_stack == -1 and tok == NAME and value in ("def", "class"): in_def = True elif tok == OP and value == "(": if in_def: # Avoid false positives for declarations such as: # def gettext(arg='message'): in_def = False continue if funcname: message_lineno = lineno call_stack += 1 elif in_def and tok == OP and value == ":": # End of a class definition without parens in_def = False continue elif call_stack == -1 and tok == COMMENT: # Strip the comment token from the line if PY2: value = value.decode(encoding) value = value[1:].strip() if in_translator_comments and translator_comments[-1][0] == lineno - 1: # We're already inside a translator comment, continue appending translator_comments.append((lineno, value)) continue # If execution reaches this point, let's see if comment line # starts with one of the comment tags for comment_tag in comment_tags: if value.startswith(comment_tag): in_translator_comments = True translator_comments.append((lineno, value)) break elif funcname and call_stack == 0: nested = tok == NAME and value in keywords if (tok == OP and value == ")") or nested: if buf: messages.append("".join(buf)) del buf[:] else: messages.append(None) if len(messages) > 1: messages = tuple(messages) else: messages = messages[0] # Comments don't apply unless they immediately preceed the # message if ( translator_comments and translator_comments[-1][0] < message_lineno - 1 ): translator_comments = [] ### HERE start our modifications to pybabel's script if funcname in ["trans", "trans_lazy"]: # `messages` will have all the string parameters to our function # As we specify in the documentation of `trans`, # the first will be the message ID, the second will be the default message # and the (optional) third will be the message context if len(messages) > 1 and messages[1]: # If we have a default, add it as a special comment # that will be processed by our `merge_catalogs` script translator_comments.append( (message_lineno, "default-message: " + messages[1]) ) if len(messages) > 2 and isinstance(messages[2], str): context = messages[2] else: context = None if context: # if we have a context, trick pybabel to use `pgettext` # so that it adds the context to the translation file funcname = "pgettext" messages = [context, messages[0]] else: funcname = None ### HERE end our modifications to pybabel's script yield ( message_lineno, funcname, messages, [comment[1] for comment in translator_comments], ) funcname = lineno = message_lineno = None call_stack = -1 messages = [] translator_comments = [] in_translator_comments = False if nested: funcname = value elif tok == STRING: # Unwrap quotes in a safe manner, maintaining the string's # encoding # https://sourceforge.net/tracker/?func=detail&atid=355470& # aid=617979&group_id=5470 code = compile( "# coding=%s\n%s" % (str(encoding), value), "<string>", "eval", future_flags, ) value = eval(code, {"__builtins__": {}}, {}) if PY2 and not isinstance(value, text_type): value = value.decode(encoding) buf.append(value) elif tok == OP and value == ",": if buf: messages.append("".join(buf)) del buf[:] else: messages.append(None) if translator_comments: # We have translator comments, and since we're on a # comma(,) user is allowed to break into a new line # Let's increase the last comment's lineno in order # for the comment to still be a valid one old_lineno, old_comment = translator_comments.pop() translator_comments.append((old_lineno + 1, old_comment)) elif call_stack > 0 and tok == OP and value == ")": call_stack -= 1 elif funcname and call_stack == -1: funcname = None elif tok == NAME and value in keywords: funcname = value