import six if six.PY3: long = int from google.protobuf.internal import type_checkers from google.protobuf import descriptor from google.protobuf import text_encoding __all__ = ['MessageToString', 'PrintMessage', 'PrintField', 'PrintFieldValue', 'Merge'] _INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(), type_checkers.Int32ValueChecker(), type_checkers.Uint64ValueChecker(), type_checkers.Int64ValueChecker()) _FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE) _FLOAT_NAN = re.compile('nanf?', re.IGNORECASE) _FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT, descriptor.FieldDescriptor.CPPTYPE_DOUBLE]) class Error(Exception): """Top-level module error for text_format.""" class ParseError(Error): """Thrown in case of ASCII parsing error."""
class _Tokenizer(object): """Protocol buffer ASCII representation tokenizer. This class handles the lower level string parsing by splitting it into meaningful tokens. It was directly ported from the Java protocol buffer API. """ _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE) _WHITESPACE_BYTES = re.compile(b'(\\s|(#.*$))+', re.MULTILINE) _TOKEN = re.compile( '[a-zA-Z_][0-9a-zA-Z_+-]*|' # an identifier '[0-9+-][0-9a-zA-Z_.+-]*|' # a number '\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|' # a double-quoted string '\'([^\'\n\\\\]|\\\\.)*(\'|\\\\?$)') # a single-quoted string _TOKEN_BYTES = re.compile( b'[a-zA-Z_][0-9a-zA-Z_+-]*|' # an identifier b'[0-9+-][0-9a-zA-Z_.+-]*|' # a number b'\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|' # a double-quoted string b'\'([^\'\n\\\\]|\\\\.)*(\'|\\\\?$)') # a single-quoted string _IDENTIFIER = re.compile('\w+') _IDENTIFIER_BYTES = re.compile(b'\w+') _INTEGER_CHECKERS = [ type_checkers.Uint32ValueChecker(), type_checkers.Int32ValueChecker(), type_checkers.Uint64ValueChecker(), type_checkers.Int64ValueChecker() ] _FLOAT_INFINITY = re.compile(b'-?inf(inity)?f?', re.IGNORECASE) _FLOAT_NAN = re.compile(b"nanf?", re.IGNORECASE) def __init__(self, text_message): self._text_message = text_message self._position = 0 self._line = -1 self._column = 0 self._token_start = None self.token = b'' self._lines = deque(text_message.split(b'\n')) self._current_line = b'' self._previous_line = 0 self._previous_column = 0 self._SkipWhitespace() self.NextToken() def AtEnd(self): """Checks the end of the text was reached. Returns: True iff the end was reached. """ return self.token == b'' def _PopLine(self): while len(self._current_line) <= self._column: if not self._lines: self._current_line = b'' return self._line += 1 self._column = 0 self._current_line = self._lines.popleft() def _SkipWhitespace(self): while True: self._PopLine() if isinstance(self._current_line, str): match = self._WHITESPACE.match(self._current_line, self._column) else: match = self._WHITESPACE_BYTES.match(self._current_line, self._column) if not match: break length = len(match.group(0)) self._column += length def TryConsume(self, token): """Tries to consume a given piece of text. Args: token: Text to consume. Returns: True iff the text was consumed. """ if self.token == token: self.NextToken() return True return False def Consume(self, token): """Consumes a piece of text. Args: token: Text to consume. Raises: ParseError: If the text couldn't be consumed. """ if not self.TryConsume(token): raise self._ParseError('Expected "%s".' % token) def LookingAtInteger(self): """Checks if the current token is an integer. Returns: True iff the current token is an integer. """ if not self.token: return False c = ord(self.token[0:1]) return (c >= ord('0') and c <= ord('9')) or c == ord('-') or c == ord('+') def ConsumeIdentifier(self): """Consumes protocol message field identifier. Returns: Identifier string. Raises: ParseError: If an identifier couldn't be consumed. """ result = self.token identifier_match = self._IDENTIFIER_BYTES.match(result) if not identifier_match: raise self._ParseError('Expected identifier.') self.NextToken() return bytes_to_string(result) def ConsumeInt32(self): """Consumes a signed 32bit integer number. Returns: The integer parsed. Raises: ParseError: If a signed 32bit integer couldn't be consumed. """ try: result = self._ParseInteger(self.token, is_signed=True, is_long=False) except ValueError as e: raise self._IntegerParseError(e) self.NextToken() return result def ConsumeUint32(self): """Consumes an unsigned 32bit integer number. Returns: The integer parsed. Raises: ParseError: If an unsigned 32bit integer couldn't be consumed. """ try: result = self._ParseInteger(self.token, is_signed=False, is_long=False) except ValueError as e: raise self._IntegerParseError(e) self.NextToken() return result def ConsumeInt64(self): """Consumes a signed 64bit integer number. Returns: The integer parsed. Raises: ParseError: If a signed 64bit integer couldn't be consumed. """ try: result = self._ParseInteger(self.token, is_signed=True, is_long=True) except ValueError as e: raise self._IntegerParseError(e) self.NextToken() return result def ConsumeUint64(self): """Consumes an unsigned 64bit integer number. Returns: The integer parsed. Raises: ParseError: If an unsigned 64bit integer couldn't be consumed. """ try: result = self._ParseInteger(self.token, is_signed=False, is_long=True) except ValueError as e: raise self._IntegerParseError(e) self.NextToken() return result def ConsumeFloat(self): """Consumes an floating point number. Returns: The number parsed. Raises: ParseError: If a floating point number couldn't be consumed. """ text = self.token if self._FLOAT_INFINITY.match(text): self.NextToken() if text.startswith(b'-'): return -_INFINITY return _INFINITY if self._FLOAT_NAN.match(text): self.NextToken() return _NAN try: result = float(text) except ValueError as e: raise self._FloatParseError(e) self.NextToken() return result def ConsumeBool(self): """Consumes a boolean value. Returns: The bool parsed. Raises: ParseError: If a boolean value couldn't be consumed. """ if self.token in (b'true', b't', b'1'): self.NextToken() return True elif self.token in (b'false', b'f', b'0'): self.NextToken() return False else: raise self._ParseError('Expected "true" or "false".') def ConsumeString(self): """Consumes a string value. Returns: The string parsed. Raises: ParseError: If a string value couldn't be consumed. """ bytes_str = self.ConsumeByteString() try: return bytestr_to_string(bytes_str) except UnicodeDecodeError as e: raise self._StringParseError(e) def ConsumeByteString(self): """Consumes a byte array value. Returns: The array parsed (as a string). Raises: ParseError: If a byte array value couldn't be consumed. """ list = [self._ConsumeSingleByteString()] while len(self.token) > 0 and ord( self.token[0:1]) in (ord('\''), ord('"')): list.append(self._ConsumeSingleByteString()) return b"".join(list) def _ConsumeSingleByteString(self): """Consume one token of a string literal. String literals (whether bytes or text) can come in multiple adjacent tokens which are automatically concatenated, like in C or Python. This method only consumes one token. """ text = self.token if len(text) < 1 or ord(text[0:1]) not in (ord('\''), ord('"')): raise self._ParseError('Exptected string.') if len(text) < 2 or text[-1] != text[0]: raise self._ParseError('String missing ending quote.') try: result = _CUnescape(text[1:-1]) except ValueError as e: raise self._ParseError(str(e)) self.NextToken() return result def _ParseInteger(self, text, is_signed=False, is_long=False): """Parses an integer. Args: text: The text to parse. is_signed: True if a signed integer must be parsed. is_long: True if a long integer must be parsed. Returns: The integer value. Raises: ValueError: Thrown Iff the text is not a valid integer. """ pos = 0 if text.startswith(b'-'): pos += 1 base = 10 if text.startswith(b'0x', pos) or text.startswith(b'0X', pos): base = 16 elif text.startswith(b'0', pos): base = 8 # Do the actual parsing. Exception handling is propagated to caller. result = int(text, base) # Check if the integer is sane. Exceptions handled by callers. checker = self._INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)] checker.CheckValue(result) return result def ParseErrorPreviousToken(self, message): """Creates and *returns* a ParseError for the previously read token. Args: message: A message to set for the exception. Returns: A ParseError instance. """ return ParseError( '%d:%d : %s' % (self._previous_line + 1, self._previous_column + 1, message)) def _ParseError(self, message): """Creates and *returns* a ParseError for the current token.""" return ParseError( '%d:%d : %s' % (self._line + 1, self._column - len(self.token) + 1, message)) def _IntegerParseError(self, e): return self._ParseError('Couldn\'t parse integer: ' + str(e)) def _FloatParseError(self, e): return self._ParseError('Couldn\'t parse number: ' + str(e)) def _StringParseError(self, e): return self._ParseError('Couldn\'t parse string: ' + str(e)) def NextToken(self): """Reads the next meaningful token.""" self._previous_line = self._line self._previous_column = self._column self._column += len(self.token) self._SkipWhitespace() if not self._lines and len(self._current_line) <= self._column: self.token = b'' return match = self._TOKEN_BYTES.match(self._current_line, self._column) if match: token = match.group(0) self.token = token else: self.token = self._current_line[self._column:self._column + 1]
class _Tokenizer(object): """Protocol buffer ASCII representation tokenizer. This class handles the lower level string parsing by splitting it into meaningful tokens. It was directly ported from the Java protocol buffer API. """ _WHITESPACE = re.compile(b'(\\s|(#.*$))+', re.MULTILINE) _TOKEN = re.compile( b'[a-zA-Z_][0-9a-zA-Z_+-]*|' # an identifier b'[0-9+-][0-9a-zA-Z_.+-]*|' # a number b'\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|' # a double-quoted string b'\'([^\'\n\\\\]|\\\\.)*(\'|\\\\?$)') # a single-quoted string _IDENTIFIER = re.compile(b'\w+') _INTEGER_CHECKERS = [ type_checkers.Uint32ValueChecker(), type_checkers.Int32ValueChecker(), type_checkers.Uint64ValueChecker(), type_checkers.Int64ValueChecker() ] _FLOAT_INFINITY = re.compile(b'-?inf(inity)?f?', re.IGNORECASE) _FLOAT_NAN = re.compile(b"nanf?", re.IGNORECASE) def __init__(self, text_message): assert isinstance(text_message, bytes) self._text_message = text_message self._position = 0 self._line = -1 self._column = 0 self._token_start = None self.token = b'' self._lines = deque(text_message.split(b'\n')) self._current_line = b'' self._previous_line = 0 self._previous_column = 0 self._SkipWhitespace() self.NextToken() def AtEnd(self): """Checks the end of the text was reached. Returns: True iff the end was reached. """ return self.token == b'' def _PopLine(self): while len(self._current_line) <= self._column: if not self._lines: self._current_line = b'' return self._line += 1 self._column = 0 self._current_line = self._lines.popleft() def _SkipWhitespace(self): while True: self._PopLine() match = self._WHITESPACE.match(self._current_line, self._column) if not match: break length = len(match.group(0)) self._column += length def TryConsume(self, token): """Tries to consume a given piece of text. Args: token: Text to consume. Returns: True iff the text was consumed. """ if self.token == token: self.NextToken() return True return False def Consume(self, token): """Consumes a piece of text. Args: token: Text to consume. Raises: ParseError: If the text couldn't be consumed. """ if not self.TryConsume(token): raise self._ParseError('Expected "%s".' % token) def LookingAtInteger(self): """Checks if the current token is an integer. Returns: True iff the current token is an integer. """ if not self.token: return False c = self.token[0:1] return (c >= '0' and c <= '9') or c == '-' or c == '+' def ConsumeIdentifier(self): """Consumes protocol message field identifier. Returns: Identifier string. Raises: ParseError: If an identifier couldn't be consumed. """ result = self.token if not self._IDENTIFIER.match(result): raise self._ParseError('Expected identifier.') self.NextToken() return result def ConsumeInt32(self): """Consumes a signed 32bit integer number. Returns: The integer parsed. Raises: ParseError: If a signed 32bit integer couldn't be consumed. """ try: result = ParseInteger(self.token, is_signed=True, is_long=False) except ValueError as e: raise self._ParseError(str(e)) self.NextToken() return result def ConsumeUint32(self): """Consumes an unsigned 32bit integer number. Returns: The integer parsed. Raises: ParseError: If an unsigned 32bit integer couldn't be consumed. """ try: result = ParseInteger(self.token, is_signed=False, is_long=False) except ValueError as e: raise self._ParseError(str(e)) self.NextToken() return result def ConsumeInt64(self): """Consumes a signed 64bit integer number. Returns: The integer parsed. Raises: ParseError: If a signed 64bit integer couldn't be consumed. """ try: result = ParseInteger(self.token, is_signed=True, is_long=True) except ValueError as e: raise self._ParseError(str(e)) self.NextToken() return result def ConsumeUint64(self): """Consumes an unsigned 64bit integer number. Returns: The integer parsed. Raises: ParseError: If an unsigned 64bit integer couldn't be consumed. """ try: result = ParseInteger(self.token, is_signed=False, is_long=True) except ValueError as e: raise self._ParseError(str(e)) self.NextToken() return result def ConsumeFloat(self): """Consumes an floating point number. Returns: The number parsed. Raises: ParseError: If a floating point number couldn't be consumed. """ try: result = ParseFloat(self.token) except ValueError as e: raise self._ParseError(str(e)) self.NextToken() return result def ConsumeBool(self): """Consumes a boolean value. Returns: The bool parsed. Raises: ParseError: If a boolean value couldn't be consumed. """ try: result = ParseBool(self.token) except ValueError as e: raise self._ParseError(str(e)) self.NextToken() return result def ConsumeString(self): """Consumes a string value. Returns: The string parsed. Raises: ParseError: If a string value couldn't be consumed. """ bytes = self.ConsumeByteString() try: return bytes.decode('utf-8') except UnicodeDecodeError as e: raise self._StringParseError(e) def ConsumeByteString(self): """Consumes a byte array value. Returns: The array parsed (as a string). Raises: ParseError: If a byte array value couldn't be consumed. """ list = [self._ConsumeSingleByteString()] while len(self.token) > 0 and self.token[0:1] in (b'\'', b'"'): list.append(self._ConsumeSingleByteString()) return b"".join(list) def _ConsumeSingleByteString(self): """Consume one token of a string literal. String literals (whether bytes or text) can come in multiple adjacent tokens which are automatically concatenated, like in C or Python. This method only consumes one token. """ text = self.token if len(text) < 1 or text[0:1] not in (b'\'', b'"'): raise self._ParseError('Expected string.') if len(text) < 2 or text[-1:] != text[0:1]: raise self._ParseError('String missing ending quote.') try: result = _CUnescape(text[1:-1]) except ValueError as e: raise self._ParseError(str(e)) self.NextToken() return result def ConsumeEnum(self, field): try: result = ParseEnum(field, self.token) except ValueError as e: raise self._ParseError(str(e)) self.NextToken() return result def ParseErrorPreviousToken(self, message): """Creates and *returns* a ParseError for the previously read token. Args: message: A message to set for the exception. Returns: A ParseError instance. """ return ParseError( '%d:%d : %s' % (self._previous_line + 1, self._previous_column + 1, message)) def _ParseError(self, message): """Creates and *returns* a ParseError for the current token.""" return ParseError('%d:%d : %s' % (self._line + 1, self._column + 1, message)) def _StringParseError(self, e): return self._ParseError('Couldn\'t parse string: ' + str(e)) def NextToken(self): """Reads the next meaningful token.""" self._previous_line = self._line self._previous_column = self._column self._column += len(self.token) self._SkipWhitespace() if not self._lines and len(self._current_line) <= self._column: self.token = b'' return match = self._TOKEN.match(self._current_line, self._column) if match: token = match.group(0) self.token = token else: self.token = self._current_line[self._column:self._column + 1]
class _Tokenizer(object): """Protocol buffer ASCII representation tokenizer. This class handles the lower level string parsing by splitting it into meaningful tokens. It was directly ported from the Java protocol buffer API. """ _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE) _TOKEN = re.compile( '[a-zA-Z_][0-9a-zA-Z_+-]*|' # an identifier '[0-9+-][0-9a-zA-Z_.+-]*|' # a number '\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|' # a double-quoted string '\'([^\"\n\\\\]|\\\\.)*(\'|\\\\?$)') # a single-quoted string _IDENTIFIER = re.compile('\w+') _INTEGER_CHECKERS = [ type_checkers.Uint32ValueChecker(), type_checkers.Int32ValueChecker(), type_checkers.Uint64ValueChecker(), type_checkers.Int64ValueChecker() ] _FLOAT_INFINITY = re.compile('-?inf(inity)?f?', re.IGNORECASE) _FLOAT_NAN = re.compile("nanf?", re.IGNORECASE) def __init__(self, text_message): self._text_message = text_message self._position = 0 self._line = -1 self._column = 0 self._token_start = None self.token = '' self._lines = deque(text_message.split('\n')) self._current_line = '' self._previous_line = 0 self._previous_column = 0 self._SkipWhitespace() self.NextToken() def AtEnd(self): """Checks the end of the text was reached. Returns: True iff the end was reached. """ return not self._lines and not self._current_line def _PopLine(self): while not self._current_line: if not self._lines: self._current_line = '' return self._line += 1 self._column = 0 self._current_line = self._lines.popleft() def _SkipWhitespace(self): while True: self._PopLine() match = re.match(self._WHITESPACE, self._current_line) if not match: break length = len(match.group(0)) self._current_line = self._current_line[length:] self._column += length def TryConsume(self, token): """Tries to consume a given piece of text. Args: token: Text to consume. Returns: True iff the text was consumed. """ if self.token == token: self.NextToken() return True return False def Consume(self, token): """Consumes a piece of text. Args: token: Text to consume. Raises: ParseError: If the text couldn't be consumed. """ if not self.TryConsume(token): raise self._ParseError('Expected "%s".' % token) def LookingAtInteger(self): """Checks if the current token is an integer. Returns: True iff the current token is an integer. """ if not self.token: return False c = self.token[0] return (c >= '0' and c <= '9') or c == '-' or c == '+' def ConsumeIdentifier(self): """Consumes protocol message field identifier. Returns: Identifier string. Raises: ParseError: If an identifier couldn't be consumed. """ result = self.token if not re.match(self._IDENTIFIER, result): raise self._ParseError('Expected identifier.') self.NextToken() return result def ConsumeInt32(self): """Consumes a signed 32bit integer number. Returns: The integer parsed. Raises: ParseError: If a signed 32bit integer couldn't be consumed. """ try: result = self._ParseInteger(self.token, is_signed=True, is_long=False) except ValueError, e: raise self._IntegerParseError(e) self.NextToken() return result