コード例 #1
0
def test_pushback(py_c_token):
    """Test pushing back tokens."""
    Tokenizer = py_c_token
    tok = Tokenizer(prop_parse_test, '', string_bracket=True)
    tokens = []
    for i, (tok_type, tok_value) in enumerate(tok):
        if i % 3 == 0:
            tok.push_back(tok_type, tok_value)
        else:
            tokens.append((tok_type, tok_value))
    check_tokens(tokens, prop_parse_tokens)
コード例 #2
0
def test_pushback(py_c_token):
    """Test pushing back tokens."""
    Tokenizer = py_c_token
    tok = Tokenizer(prop_parse_test, '', string_bracket=True)
    tokens = []
    for i, (tok_type, tok_value) in enumerate(tok):
        if i % 3 == 0:
            tok.push_back(tok_type, tok_value)
        else:
            tokens.append((tok_type, tok_value))
    check_tokens(tokens, prop_parse_tokens)
コード例 #3
0
    def parse(
        file_contents: Union[str, BaseTokenizer, Iterator[str]],
        filename='', *,
        flags: Mapping[str, bool]=EmptyMapping,
        allow_escapes: bool=True,
        single_line: bool=False,
    ) -> "Property":
        """Returns a Property tree parsed from given text.

        filename, if set should be the source of the text for debug purposes.
        file_contents should be an iterable of strings or a single string.
        flags should be a mapping for additional flags to accept
        (which overrides defaults).
        allow_escapes allows choosing if \\t or similar escapes are parsed.
        If single_line is set, allow multiple properties to be on the same line.
        This means unterminated strings will be caught late (if at all), but
        it allows parsing some 'internal' data blocks.

        Alternatively, file_contents may be an already created tokenizer.
        In this case allow_escapes is ignored.
        """
        # The block we are currently adding to.

        # The special name 'None' marks it as the root property, which
        # just outputs its children when exported. This way we can handle
        # multiple root blocks in the file, while still returning a single
        # Property object which has all the methods.
        # Skip calling __init__ for speed.
        cur_block = root = Property.__new__(Property)
        cur_block._folded_name = cur_block.real_name = None
        cur_block.value = []

        # A queue of the properties we are currently in (outside to inside).
        # And the line numbers of each of these, for error reporting.
        open_properties = [(cur_block, 1)]

        # Grab a reference to the token values, so we avoid global lookups.
        STRING = Token.STRING
        PROP_FLAG = Token.PROP_FLAG
        NEWLINE = Token.NEWLINE
        BRACE_OPEN = Token.BRACE_OPEN
        BRACE_CLOSE = Token.BRACE_CLOSE

        if isinstance(file_contents, BaseTokenizer):
            tokenizer = file_contents
            tokenizer.filename = filename
            tokenizer.error_type = KeyValError
        else:
            tokenizer = Tokenizer(
                file_contents,
                filename,
                KeyValError,
                string_bracket=True,
                allow_escapes=allow_escapes,
            )

        # If not None, we're requiring a block to open next ("name"\n must have { next.)
        # It's the line number of the header name.
        block_line: Optional[int] = None
        # Are we permitted to replace the last property with a flagged version of the same?
        can_flag_replace = False

        for token_type, token_value in tokenizer:
            if token_type is BRACE_OPEN:  # {
                # Open a new block - make sure the last token was a name..
                if block_line is None:
                    raise tokenizer.error(
                        'Property cannot have sub-section if it already '
                        'has an in-line value.\n\n'
                        'A "name" "value" line cannot then open a block.',
                    )
                can_flag_replace = False
                cur_block = cur_block.value[-1]
                cur_block.value = []
                open_properties.append((cur_block, block_line))
                block_line = None
                continue
            # Something else, but followed by '{'
            elif block_line is not None and token_type is not NEWLINE:
                raise tokenizer.error(
                    'Block opening ("{{") required!\n\n'
                    'A single "name" on a line should next have a open brace '
                    'to begin a block.',
                )

            if token_type is NEWLINE:
                continue
            if token_type is STRING:   # "string"
                # Skip calling __init__ for speed. Value needs to be set
                # before using this, since it's unset here.
                keyvalue = Property.__new__(Property)
                keyvalue._folded_name = sys.intern(token_value.casefold())
                keyvalue.real_name = sys.intern(token_value)

                # We need to check the next token to figure out what kind of
                # prop it is.
                prop_type, prop_value = tokenizer()

                # It's a block followed by flag. ("name" [stuff])
                if prop_type is PROP_FLAG: 
                    # That must be the end of the line..
                    tokenizer.expect(NEWLINE)
                    block_line = tokenizer.line_num
                    if _read_flag(flags, prop_value):
                        keyvalue.value = []

                        # Special function - if the last prop was a
                        # keyvalue with this name, replace it instead.
                        if (
                            can_flag_replace and
                            cur_block.value[-1].real_name == token_value and
                            type(cur_block.value[-1].value) == list
                        ):
                            cur_block.value[-1] = keyvalue
                        else:
                            cur_block.value.append(keyvalue)
                        # Can't do twice in a row
                        can_flag_replace = False

                elif prop_type is STRING:
                    # A value.. ("name" "value")
                    if block_line is not None:
                        raise tokenizer.error(
                            'Keyvalue split across lines!\n\n'
                            'A value like "name" "value" must be on the same '
                            'line.'
                        )
                    block_line = None

                    keyvalue.value = prop_value

                    # Check for flags.
                    flag_token, flag_val = tokenizer()
                    if flag_token is PROP_FLAG:
                        # Should be the end of the line here.
                        tokenizer.expect(NEWLINE)
                        if _read_flag(flags, flag_val):
                            # Special function - if the last prop was a
                            # keyvalue with this name, replace it instead.
                            if (
                                can_flag_replace and
                                cur_block.value[-1].real_name == token_value and
                                type(cur_block.value[-1].value) == str
                            ):
                                cur_block.value[-1] = keyvalue
                            else:
                                cur_block.value.append(keyvalue)
                            # Can't do twice in a row
                            can_flag_replace = False
                    elif flag_token is STRING:
                        # Specifically disallow multiple text on the same line
                        # normally.
                        # ("name" "value" "name2" "value2")
                        if single_line:
                            cur_block.value.append(keyvalue)
                            tokenizer.push_back(flag_token, flag_val)
                            continue
                        else:
                            raise tokenizer.error(
                                "Cannot have multiple names on the same line!"
                            )
                    else:
                        # Otherwise, it's got nothing after.
                        # So insert the keyvalue, and check the token
                        # in the next loop. This allows braces to be
                        # on the same line.
                        cur_block.value.append(keyvalue)
                        can_flag_replace = True
                        tokenizer.push_back(flag_token, flag_val)
                    continue
                else:
                    # Something else - treat this as a block, and
                    # then re-evaluate the token in the next loop.
                    keyvalue.value = []

                    block_line = tokenizer.line_num
                    can_flag_replace = False
                    cur_block.value.append(keyvalue)
                    tokenizer.push_back(prop_type, prop_value)
                    continue

            elif token_type is BRACE_CLOSE:  # }
                # Move back a block
                open_properties.pop()
                try:
                    cur_block, _ = open_properties[-1]
                except IndexError:
                    # It's empty, we've closed one too many properties.
                    raise tokenizer.error(
                        'Too many closing brackets.\n\n'
                        'An extra closing bracket was added which would '
                        'close the outermost level.',
                    )
                # For replacing the block.
                can_flag_replace = True
            else:
                raise tokenizer.error(token_type, token_value)

        # We're out of data, do some final sanity checks.
        
        # We last had a ("name"\n), so we were expecting a block
        # next.
        if block_line is not None:
            raise KeyValError(
                'Block opening ("{") required, but hit EOF!\n'
                'A "name" line was located at the end of the file, which needs'
                ' a {} block to follow.',
                tokenizer.filename,
                line=None,
            )
        
        # All the properties in the file should be closed,
        # so the only thing in open_properties should be the 
        # root one we added.
        
        if len(open_properties) > 1:
            raise KeyValError(
                'End of text reached with remaining open sections.\n\n'
                "File ended with at least one property that didn't "
                'have an ending "}".\n'
                'Open properties: \n- Root at line 1\n' + '\n'.join([
                    f'- "{prop.real_name}" on line {line_num}'
                    for prop, line_num in open_properties[1:]
                ]),
                tokenizer.filename,
                line=None,
            )
        # Return that root property.
        return root
コード例 #4
0
    def parse(
        file_contents: Union[str, Iterator[str]],
        filename='',
        flags: Dict[str, bool] = EmptyMapping,
    ) -> "Property":
        """Returns a Property tree parsed from given text.

        filename, if set should be the source of the text for debug purposes.
        file_contents should be an iterable of strings or a single string.
        flags should be a mapping for additional flags to accept
        (which overrides defaults).
        """
        # The block we are currently adding to.

        # The special name 'None' marks it as the root property, which
        # just outputs its children when exported. This way we can handle
        # multiple root blocks in the file, while still returning a single
        # Property object which has all the methods.
        cur_block = Property(None, [])

        # A queue of the properties we are currently in (outside to inside).
        open_properties = [cur_block]

        # Grab a reference to the token values, so we avoid global lookups.
        STRING = Token.STRING
        PROP_FLAG = Token.PROP_FLAG
        NEWLINE = Token.NEWLINE
        BRACE_OPEN = Token.BRACE_OPEN
        BRACE_CLOSE = Token.BRACE_CLOSE

        tokenizer = Tokenizer(
            file_contents,
            filename,
            KeyValError,
            string_bracket=True,
        )

        # Do we require a block to be opened next? ("name"\n must have { next.)
        requires_block = False
        # Are we permitted to replace the last property with a flagged version of the same?
        can_flag_replace = False

        for token_type, token_value in tokenizer:
            if token_type is BRACE_OPEN:  # {
                # Open a new block - make sure the last token was a name..
                if not requires_block:
                    raise tokenizer.error(
                        'Property cannot have sub-section if it already '
                        'has an in-line value.\n\n'
                        'A "name" "value" line cannot then open a block.', )
                requires_block = can_flag_replace = False
                cur_block = cur_block[-1]
                cur_block.value = []
                open_properties.append(cur_block)
                continue
            # Something else, but followed by '{'
            elif requires_block and token_type is not NEWLINE:
                raise tokenizer.error(
                    'Block opening ("{{") required!\n\n'
                    'A single "name" on a line should next have a open brace '
                    'to begin a block.', )

            if token_type is NEWLINE:
                continue
            if token_type is STRING:  # "string"
                # We need to check the next token to figure out what kind of
                # prop it is.
                prop_type, prop_value = tokenizer()

                # It's a block followed by flag. ("name" [stuff])
                if prop_type is PROP_FLAG:
                    # That must be the end of the line..
                    tokenizer.expect(NEWLINE)
                    requires_block = True
                    if _read_flag(flags, prop_value):
                        # Special function - if the last prop was a
                        # keyvalue with this name, replace it instead.
                        if (can_flag_replace and cur_block.value[-1].real_name
                                == token_value
                                and cur_block.value[-1].has_children()):
                            cur_block.value[-1] = Property(token_value, [])
                        else:
                            cur_block.append(Property(token_value, []))
                        # Can't do twice in a row
                        can_flag_replace = False

                elif prop_type is STRING:
                    # A value.. ("name" "value")
                    if requires_block:
                        raise tokenizer.error(
                            'Keyvalue split across lines!\n\n'
                            'A value like "name" "value" must be on the same '
                            'line.')
                    requires_block = False

                    keyvalue = Property(token_value, prop_value)

                    # Check for flags.
                    flag_token, flag_val = tokenizer()
                    if flag_token is PROP_FLAG:
                        # Should be the end of the line here.
                        tokenizer.expect(NEWLINE)
                        if _read_flag(flags, flag_val):
                            # Special function - if the last prop was a
                            # keyvalue with this name, replace it instead.
                            if (can_flag_replace
                                    and cur_block.value[-1].real_name
                                    == token_value and
                                    not cur_block.value[-1].has_children()):
                                cur_block.value[-1] = keyvalue
                            else:
                                cur_block.append(keyvalue)
                            # Can't do twice in a row
                            can_flag_replace = False
                    elif flag_token is STRING:
                        # Specifically disallow multiple text on the same line.
                        # ("name" "value" "name2" "value2")
                        raise tokenizer.error(
                            "Cannot have multiple names on the same line!")
                    else:
                        # Otherwise, it's got nothing after.
                        # So insert the keyvalue, and check the token
                        # in the next loop. This allows braces to be
                        # on the same line.
                        cur_block.append(keyvalue)
                        can_flag_replace = True
                        tokenizer.push_back(flag_token, flag_val)
                    continue
                else:  # Something else - treat this as a block, and
                    # then re-evaluate this in the next loop.
                    requires_block = True
                    can_flag_replace = False
                    cur_block.append(Property(token_value, []))
                    tokenizer.push_back(prop_type, prop_value)
                    continue

            elif token_type is BRACE_CLOSE:  # }
                # Move back a block
                open_properties.pop()
                try:
                    cur_block = open_properties[-1]
                except IndexError:
                    # It's empty, we've closed one too many properties.
                    raise tokenizer.error(
                        'Too many closing brackets.\n\n'
                        'An extra closing bracket was added which would '
                        'close the outermost level.', )
                # For replacing the block.
                can_flag_replace = True
            else:
                raise tokenizer.error(token_type)

        # We're out of data, do some final sanity checks.

        # We last had a ("name"\n), so we were expecting a block
        # next.
        if requires_block:
            raise KeyValError(
                'Block opening ("{") required, but hit EOF!\n'
                'A "name" line was located at the end of the file, which needs'
                ' a {} block to follow.',
                tokenizer.filename,
                line=None,
            )

        # All the properties in the file should be closed,
        # so the only thing in open_properties should be the
        # root one we added.

        if len(open_properties) > 1:
            raise KeyValError(
                'End of text reached with remaining open sections.\n\n'
                "File ended with at least one property that didn't "
                'have an ending "}".',
                tokenizer.filename,
                line=None,
            )
        # Return that root property.
        return open_properties[0]