Exemple #1
0
if six.PY3:
  long = int

from google.protobuf.internal import type_checkers
from google.protobuf import descriptor
from google.protobuf import text_encoding

__all__ = ['MessageToString', 'PrintMessage', 'PrintField',
           'PrintFieldValue', 'Merge']


_INTEGER_CHECKERS = (type_checkers.Uint32ValueChecker(),
                     type_checkers.Int32ValueChecker(),
                     type_checkers.Uint64ValueChecker(),
                     type_checkers.Int64ValueChecker())
_FLOAT_INFINITY = re.compile('-?inf(?:inity)?f?', re.IGNORECASE)
_FLOAT_NAN = re.compile('nanf?', re.IGNORECASE)
_FLOAT_TYPES = frozenset([descriptor.FieldDescriptor.CPPTYPE_FLOAT,
                          descriptor.FieldDescriptor.CPPTYPE_DOUBLE])


class Error(Exception):
  """Top-level module error for text_format."""


class ParseError(Error):
  """Thrown in case of ASCII parsing error."""

class TextWriter(object):
  def __init__(self, as_utf8):
class _Tokenizer(object):
    """Protocol buffer ASCII representation tokenizer.

  This class handles the lower level string parsing by splitting it into
  meaningful tokens.

  It was directly ported from the Java protocol buffer API.
  """

    _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE)
    _WHITESPACE_BYTES = re.compile(b'(\\s|(#.*$))+', re.MULTILINE)
    _TOKEN = re.compile(
        '[a-zA-Z_][0-9a-zA-Z_+-]*|'  # an identifier
        '[0-9+-][0-9a-zA-Z_.+-]*|'  # a number
        '\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|'  # a double-quoted string
        '\'([^\'\n\\\\]|\\\\.)*(\'|\\\\?$)')  # a single-quoted string
    _TOKEN_BYTES = re.compile(
        b'[a-zA-Z_][0-9a-zA-Z_+-]*|'  # an identifier
        b'[0-9+-][0-9a-zA-Z_.+-]*|'  # a number
        b'\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|'  # a double-quoted string
        b'\'([^\'\n\\\\]|\\\\.)*(\'|\\\\?$)')  # a single-quoted string
    _IDENTIFIER = re.compile('\w+')
    _IDENTIFIER_BYTES = re.compile(b'\w+')
    _INTEGER_CHECKERS = [
        type_checkers.Uint32ValueChecker(),
        type_checkers.Int32ValueChecker(),
        type_checkers.Uint64ValueChecker(),
        type_checkers.Int64ValueChecker()
    ]
    _FLOAT_INFINITY = re.compile(b'-?inf(inity)?f?', re.IGNORECASE)
    _FLOAT_NAN = re.compile(b"nanf?", re.IGNORECASE)

    def __init__(self, text_message):
        self._text_message = text_message

        self._position = 0
        self._line = -1
        self._column = 0
        self._token_start = None
        self.token = b''
        self._lines = deque(text_message.split(b'\n'))
        self._current_line = b''
        self._previous_line = 0
        self._previous_column = 0
        self._SkipWhitespace()
        self.NextToken()

    def AtEnd(self):
        """Checks the end of the text was reached.

    Returns:
      True iff the end was reached.
    """
        return self.token == b''

    def _PopLine(self):
        while len(self._current_line) <= self._column:
            if not self._lines:
                self._current_line = b''
                return
            self._line += 1
            self._column = 0
            self._current_line = self._lines.popleft()

    def _SkipWhitespace(self):
        while True:
            self._PopLine()
            if isinstance(self._current_line, str):
                match = self._WHITESPACE.match(self._current_line,
                                               self._column)
            else:
                match = self._WHITESPACE_BYTES.match(self._current_line,
                                                     self._column)
            if not match:
                break
            length = len(match.group(0))
            self._column += length

    def TryConsume(self, token):
        """Tries to consume a given piece of text.

    Args:
      token: Text to consume.

    Returns:
      True iff the text was consumed.
    """
        if self.token == token:
            self.NextToken()
            return True
        return False

    def Consume(self, token):
        """Consumes a piece of text.

    Args:
      token: Text to consume.

    Raises:
      ParseError: If the text couldn't be consumed.
    """
        if not self.TryConsume(token):
            raise self._ParseError('Expected "%s".' % token)

    def LookingAtInteger(self):
        """Checks if the current token is an integer.

    Returns:
      True iff the current token is an integer.
    """
        if not self.token:
            return False
        c = ord(self.token[0:1])
        return (c >= ord('0')
                and c <= ord('9')) or c == ord('-') or c == ord('+')

    def ConsumeIdentifier(self):
        """Consumes protocol message field identifier.

    Returns:
      Identifier string.

    Raises:
      ParseError: If an identifier couldn't be consumed.
    """
        result = self.token
        identifier_match = self._IDENTIFIER_BYTES.match(result)
        if not identifier_match:
            raise self._ParseError('Expected identifier.')
        self.NextToken()
        return bytes_to_string(result)

    def ConsumeInt32(self):
        """Consumes a signed 32bit integer number.

    Returns:
      The integer parsed.

    Raises:
      ParseError: If a signed 32bit integer couldn't be consumed.
    """
        try:
            result = self._ParseInteger(self.token,
                                        is_signed=True,
                                        is_long=False)
        except ValueError as e:
            raise self._IntegerParseError(e)
        self.NextToken()
        return result

    def ConsumeUint32(self):
        """Consumes an unsigned 32bit integer number.

    Returns:
      The integer parsed.

    Raises:
      ParseError: If an unsigned 32bit integer couldn't be consumed.
    """
        try:
            result = self._ParseInteger(self.token,
                                        is_signed=False,
                                        is_long=False)
        except ValueError as e:
            raise self._IntegerParseError(e)
        self.NextToken()
        return result

    def ConsumeInt64(self):
        """Consumes a signed 64bit integer number.

    Returns:
      The integer parsed.

    Raises:
      ParseError: If a signed 64bit integer couldn't be consumed.
    """
        try:
            result = self._ParseInteger(self.token,
                                        is_signed=True,
                                        is_long=True)
        except ValueError as e:
            raise self._IntegerParseError(e)
        self.NextToken()
        return result

    def ConsumeUint64(self):
        """Consumes an unsigned 64bit integer number.

    Returns:
      The integer parsed.

    Raises:
      ParseError: If an unsigned 64bit integer couldn't be consumed.
    """
        try:
            result = self._ParseInteger(self.token,
                                        is_signed=False,
                                        is_long=True)
        except ValueError as e:
            raise self._IntegerParseError(e)
        self.NextToken()
        return result

    def ConsumeFloat(self):
        """Consumes an floating point number.

    Returns:
      The number parsed.

    Raises:
      ParseError: If a floating point number couldn't be consumed.
    """
        text = self.token
        if self._FLOAT_INFINITY.match(text):
            self.NextToken()
            if text.startswith(b'-'):
                return -_INFINITY
            return _INFINITY

        if self._FLOAT_NAN.match(text):
            self.NextToken()
            return _NAN

        try:
            result = float(text)
        except ValueError as e:
            raise self._FloatParseError(e)
        self.NextToken()
        return result

    def ConsumeBool(self):
        """Consumes a boolean value.

    Returns:
      The bool parsed.

    Raises:
      ParseError: If a boolean value couldn't be consumed.
    """
        if self.token in (b'true', b't', b'1'):
            self.NextToken()
            return True
        elif self.token in (b'false', b'f', b'0'):
            self.NextToken()
            return False
        else:
            raise self._ParseError('Expected "true" or "false".')

    def ConsumeString(self):
        """Consumes a string value.

    Returns:
      The string parsed.

    Raises:
      ParseError: If a string value couldn't be consumed.
    """
        bytes_str = self.ConsumeByteString()
        try:
            return bytestr_to_string(bytes_str)
        except UnicodeDecodeError as e:
            raise self._StringParseError(e)

    def ConsumeByteString(self):
        """Consumes a byte array value.

    Returns:
      The array parsed (as a string).

    Raises:
      ParseError: If a byte array value couldn't be consumed.
    """
        list = [self._ConsumeSingleByteString()]
        while len(self.token) > 0 and ord(
                self.token[0:1]) in (ord('\''), ord('"')):
            list.append(self._ConsumeSingleByteString())
        return b"".join(list)

    def _ConsumeSingleByteString(self):
        """Consume one token of a string literal.

    String literals (whether bytes or text) can come in multiple adjacent
    tokens which are automatically concatenated, like in C or Python.  This
    method only consumes one token.
    """
        text = self.token
        if len(text) < 1 or ord(text[0:1]) not in (ord('\''), ord('"')):
            raise self._ParseError('Exptected string.')

        if len(text) < 2 or text[-1] != text[0]:
            raise self._ParseError('String missing ending quote.')

        try:
            result = _CUnescape(text[1:-1])
        except ValueError as e:
            raise self._ParseError(str(e))
        self.NextToken()
        return result

    def _ParseInteger(self, text, is_signed=False, is_long=False):
        """Parses an integer.

    Args:
      text: The text to parse.
      is_signed: True if a signed integer must be parsed.
      is_long: True if a long integer must be parsed.

    Returns:
      The integer value.

    Raises:
      ValueError: Thrown Iff the text is not a valid integer.
    """
        pos = 0
        if text.startswith(b'-'):
            pos += 1

        base = 10
        if text.startswith(b'0x', pos) or text.startswith(b'0X', pos):
            base = 16
        elif text.startswith(b'0', pos):
            base = 8

        # Do the actual parsing. Exception handling is propagated to caller.
        result = int(text, base)

        # Check if the integer is sane. Exceptions handled by callers.
        checker = self._INTEGER_CHECKERS[2 * int(is_long) + int(is_signed)]
        checker.CheckValue(result)
        return result

    def ParseErrorPreviousToken(self, message):
        """Creates and *returns* a ParseError for the previously read token.

    Args:
      message: A message to set for the exception.

    Returns:
      A ParseError instance.
    """
        return ParseError(
            '%d:%d : %s' %
            (self._previous_line + 1, self._previous_column + 1, message))

    def _ParseError(self, message):
        """Creates and *returns* a ParseError for the current token."""
        return ParseError(
            '%d:%d : %s' %
            (self._line + 1, self._column - len(self.token) + 1, message))

    def _IntegerParseError(self, e):
        return self._ParseError('Couldn\'t parse integer: ' + str(e))

    def _FloatParseError(self, e):
        return self._ParseError('Couldn\'t parse number: ' + str(e))

    def _StringParseError(self, e):
        return self._ParseError('Couldn\'t parse string: ' + str(e))

    def NextToken(self):
        """Reads the next meaningful token."""
        self._previous_line = self._line
        self._previous_column = self._column

        self._column += len(self.token)
        self._SkipWhitespace()

        if not self._lines and len(self._current_line) <= self._column:
            self.token = b''
            return

        match = self._TOKEN_BYTES.match(self._current_line, self._column)

        if match:
            token = match.group(0)
            self.token = token
        else:
            self.token = self._current_line[self._column:self._column + 1]
class _Tokenizer(object):
    """Protocol buffer ASCII representation tokenizer.

  This class handles the lower level string parsing by splitting it into
  meaningful tokens.

  It was directly ported from the Java protocol buffer API.
  """

    _WHITESPACE = re.compile(b'(\\s|(#.*$))+', re.MULTILINE)
    _TOKEN = re.compile(
        b'[a-zA-Z_][0-9a-zA-Z_+-]*|'  # an identifier
        b'[0-9+-][0-9a-zA-Z_.+-]*|'  # a number
        b'\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|'  # a double-quoted string
        b'\'([^\'\n\\\\]|\\\\.)*(\'|\\\\?$)')  # a single-quoted string
    _IDENTIFIER = re.compile(b'\w+')
    _INTEGER_CHECKERS = [
        type_checkers.Uint32ValueChecker(),
        type_checkers.Int32ValueChecker(),
        type_checkers.Uint64ValueChecker(),
        type_checkers.Int64ValueChecker()
    ]
    _FLOAT_INFINITY = re.compile(b'-?inf(inity)?f?', re.IGNORECASE)
    _FLOAT_NAN = re.compile(b"nanf?", re.IGNORECASE)

    def __init__(self, text_message):
        assert isinstance(text_message, bytes)
        self._text_message = text_message
        self._position = 0
        self._line = -1
        self._column = 0
        self._token_start = None
        self.token = b''
        self._lines = deque(text_message.split(b'\n'))
        self._current_line = b''
        self._previous_line = 0
        self._previous_column = 0
        self._SkipWhitespace()
        self.NextToken()

    def AtEnd(self):
        """Checks the end of the text was reached.

    Returns:
      True iff the end was reached.
    """
        return self.token == b''

    def _PopLine(self):
        while len(self._current_line) <= self._column:
            if not self._lines:
                self._current_line = b''
                return
            self._line += 1
            self._column = 0
            self._current_line = self._lines.popleft()

    def _SkipWhitespace(self):
        while True:
            self._PopLine()
            match = self._WHITESPACE.match(self._current_line, self._column)
            if not match:
                break
            length = len(match.group(0))
            self._column += length

    def TryConsume(self, token):
        """Tries to consume a given piece of text.

    Args:
      token: Text to consume.

    Returns:
      True iff the text was consumed.
    """
        if self.token == token:
            self.NextToken()
            return True
        return False

    def Consume(self, token):
        """Consumes a piece of text.

    Args:
      token: Text to consume.

    Raises:
      ParseError: If the text couldn't be consumed.
    """
        if not self.TryConsume(token):
            raise self._ParseError('Expected "%s".' % token)

    def LookingAtInteger(self):
        """Checks if the current token is an integer.

    Returns:
      True iff the current token is an integer.
    """
        if not self.token:
            return False
        c = self.token[0:1]
        return (c >= '0' and c <= '9') or c == '-' or c == '+'

    def ConsumeIdentifier(self):
        """Consumes protocol message field identifier.

    Returns:
      Identifier string.

    Raises:
      ParseError: If an identifier couldn't be consumed.
    """
        result = self.token
        if not self._IDENTIFIER.match(result):
            raise self._ParseError('Expected identifier.')
        self.NextToken()
        return result

    def ConsumeInt32(self):
        """Consumes a signed 32bit integer number.

    Returns:
      The integer parsed.

    Raises:
      ParseError: If a signed 32bit integer couldn't be consumed.
    """
        try:
            result = ParseInteger(self.token, is_signed=True, is_long=False)
        except ValueError as e:
            raise self._ParseError(str(e))
        self.NextToken()
        return result

    def ConsumeUint32(self):
        """Consumes an unsigned 32bit integer number.

    Returns:
      The integer parsed.

    Raises:
      ParseError: If an unsigned 32bit integer couldn't be consumed.
    """
        try:
            result = ParseInteger(self.token, is_signed=False, is_long=False)
        except ValueError as e:
            raise self._ParseError(str(e))
        self.NextToken()
        return result

    def ConsumeInt64(self):
        """Consumes a signed 64bit integer number.

    Returns:
      The integer parsed.

    Raises:
      ParseError: If a signed 64bit integer couldn't be consumed.
    """
        try:
            result = ParseInteger(self.token, is_signed=True, is_long=True)
        except ValueError as e:
            raise self._ParseError(str(e))
        self.NextToken()
        return result

    def ConsumeUint64(self):
        """Consumes an unsigned 64bit integer number.

    Returns:
      The integer parsed.

    Raises:
      ParseError: If an unsigned 64bit integer couldn't be consumed.
    """
        try:
            result = ParseInteger(self.token, is_signed=False, is_long=True)
        except ValueError as e:
            raise self._ParseError(str(e))
        self.NextToken()
        return result

    def ConsumeFloat(self):
        """Consumes an floating point number.

    Returns:
      The number parsed.

    Raises:
      ParseError: If a floating point number couldn't be consumed.
    """
        try:
            result = ParseFloat(self.token)
        except ValueError as e:
            raise self._ParseError(str(e))
        self.NextToken()
        return result

    def ConsumeBool(self):
        """Consumes a boolean value.

    Returns:
      The bool parsed.

    Raises:
      ParseError: If a boolean value couldn't be consumed.
    """
        try:
            result = ParseBool(self.token)
        except ValueError as e:
            raise self._ParseError(str(e))
        self.NextToken()
        return result

    def ConsumeString(self):
        """Consumes a string value.

    Returns:
      The string parsed.

    Raises:
      ParseError: If a string value couldn't be consumed.
    """
        bytes = self.ConsumeByteString()
        try:
            return bytes.decode('utf-8')
        except UnicodeDecodeError as e:
            raise self._StringParseError(e)

    def ConsumeByteString(self):
        """Consumes a byte array value.

    Returns:
      The array parsed (as a string).

    Raises:
      ParseError: If a byte array value couldn't be consumed.
    """
        list = [self._ConsumeSingleByteString()]
        while len(self.token) > 0 and self.token[0:1] in (b'\'', b'"'):
            list.append(self._ConsumeSingleByteString())
        return b"".join(list)

    def _ConsumeSingleByteString(self):
        """Consume one token of a string literal.

    String literals (whether bytes or text) can come in multiple adjacent
    tokens which are automatically concatenated, like in C or Python.  This
    method only consumes one token.
    """
        text = self.token
        if len(text) < 1 or text[0:1] not in (b'\'', b'"'):
            raise self._ParseError('Expected string.')

        if len(text) < 2 or text[-1:] != text[0:1]:
            raise self._ParseError('String missing ending quote.')

        try:
            result = _CUnescape(text[1:-1])
        except ValueError as e:
            raise self._ParseError(str(e))
        self.NextToken()
        return result

    def ConsumeEnum(self, field):
        try:
            result = ParseEnum(field, self.token)
        except ValueError as e:
            raise self._ParseError(str(e))
        self.NextToken()
        return result

    def ParseErrorPreviousToken(self, message):
        """Creates and *returns* a ParseError for the previously read token.

    Args:
      message: A message to set for the exception.

    Returns:
      A ParseError instance.
    """
        return ParseError(
            '%d:%d : %s' %
            (self._previous_line + 1, self._previous_column + 1, message))

    def _ParseError(self, message):
        """Creates and *returns* a ParseError for the current token."""
        return ParseError('%d:%d : %s' %
                          (self._line + 1, self._column + 1, message))

    def _StringParseError(self, e):
        return self._ParseError('Couldn\'t parse string: ' + str(e))

    def NextToken(self):
        """Reads the next meaningful token."""
        self._previous_line = self._line
        self._previous_column = self._column

        self._column += len(self.token)
        self._SkipWhitespace()

        if not self._lines and len(self._current_line) <= self._column:
            self.token = b''
            return

        match = self._TOKEN.match(self._current_line, self._column)
        if match:
            token = match.group(0)
            self.token = token
        else:
            self.token = self._current_line[self._column:self._column + 1]
class _Tokenizer(object):
    """Protocol buffer ASCII representation tokenizer.

  This class handles the lower level string parsing by splitting it into
  meaningful tokens.

  It was directly ported from the Java protocol buffer API.
  """

    _WHITESPACE = re.compile('(\\s|(#.*$))+', re.MULTILINE)
    _TOKEN = re.compile(
        '[a-zA-Z_][0-9a-zA-Z_+-]*|'  # an identifier
        '[0-9+-][0-9a-zA-Z_.+-]*|'  # a number
        '\"([^\"\n\\\\]|\\\\.)*(\"|\\\\?$)|'  # a double-quoted string
        '\'([^\"\n\\\\]|\\\\.)*(\'|\\\\?$)')  # a single-quoted string
    _IDENTIFIER = re.compile('\w+')
    _INTEGER_CHECKERS = [
        type_checkers.Uint32ValueChecker(),
        type_checkers.Int32ValueChecker(),
        type_checkers.Uint64ValueChecker(),
        type_checkers.Int64ValueChecker()
    ]
    _FLOAT_INFINITY = re.compile('-?inf(inity)?f?', re.IGNORECASE)
    _FLOAT_NAN = re.compile("nanf?", re.IGNORECASE)

    def __init__(self, text_message):
        self._text_message = text_message

        self._position = 0
        self._line = -1
        self._column = 0
        self._token_start = None
        self.token = ''
        self._lines = deque(text_message.split('\n'))
        self._current_line = ''
        self._previous_line = 0
        self._previous_column = 0
        self._SkipWhitespace()
        self.NextToken()

    def AtEnd(self):
        """Checks the end of the text was reached.

    Returns:
      True iff the end was reached.
    """
        return not self._lines and not self._current_line

    def _PopLine(self):
        while not self._current_line:
            if not self._lines:
                self._current_line = ''
                return
            self._line += 1
            self._column = 0
            self._current_line = self._lines.popleft()

    def _SkipWhitespace(self):
        while True:
            self._PopLine()
            match = re.match(self._WHITESPACE, self._current_line)
            if not match:
                break
            length = len(match.group(0))
            self._current_line = self._current_line[length:]
            self._column += length

    def TryConsume(self, token):
        """Tries to consume a given piece of text.

    Args:
      token: Text to consume.

    Returns:
      True iff the text was consumed.
    """
        if self.token == token:
            self.NextToken()
            return True
        return False

    def Consume(self, token):
        """Consumes a piece of text.

    Args:
      token: Text to consume.

    Raises:
      ParseError: If the text couldn't be consumed.
    """
        if not self.TryConsume(token):
            raise self._ParseError('Expected "%s".' % token)

    def LookingAtInteger(self):
        """Checks if the current token is an integer.

    Returns:
      True iff the current token is an integer.
    """
        if not self.token:
            return False
        c = self.token[0]
        return (c >= '0' and c <= '9') or c == '-' or c == '+'

    def ConsumeIdentifier(self):
        """Consumes protocol message field identifier.

    Returns:
      Identifier string.

    Raises:
      ParseError: If an identifier couldn't be consumed.
    """
        result = self.token
        if not re.match(self._IDENTIFIER, result):
            raise self._ParseError('Expected identifier.')
        self.NextToken()
        return result

    def ConsumeInt32(self):
        """Consumes a signed 32bit integer number.

    Returns:
      The integer parsed.

    Raises:
      ParseError: If a signed 32bit integer couldn't be consumed.
    """
        try:
            result = self._ParseInteger(self.token,
                                        is_signed=True,
                                        is_long=False)
        except ValueError, e:
            raise self._IntegerParseError(e)
        self.NextToken()
        return result