Beispiel #1
0
    def INT_LITERAL(self, token: Token) -> Token:
        """Token rule to detect integer literals.

        An int literal may be represented as a number in decimal form appended
        with a 'K' or number in hexadecimal form.

        Example:
        1024
        1K # same as above
        -256
        0x25

        Lexer converts the detected string into a number. String literals
        appended with 'K' are multiplied by 1024.

        :param token: token matching integer literal pattern
        :return: Token representing integer literal
        """
        number = token.value
        if number[0] == "'" and number[-1] == "'":
            # transform 'dude' into '0x64756465'
            number = "0x" + bytearray(number[1:-1], "utf-8").hex()
            number = int(number, 0)
        elif number[-1] == "K":
            number = int(number[:-1], 0) * 1024
        else:
            number = int(number, 0)

        token.value = number
        return token
Beispiel #2
0
    def IDENT(self, token: Token) -> Token:
        """Token rule to detect identifiers.

        A valid identifier can start either with underscore or a letter followed
        by any numbers of underscores, letters and numbers.

        If the name of an identifier is from the set of reserved keywords, the
        token type is replaced with the keyword name, otherwise the token is
        of type 'IDENT'.
        Values of type TRUE/YES, FALSE/NO are replaces by 1 or 0 respectively.

        :param token: token matching an identifier pattern
        :return: Token representing identifier
        """
        # it may happen that we find an identifier, which is a keyword, in such
        # a case remap the type from IDENT to reserved word (i.e. keyword)
        token_type = self.reserved.get(token.value, "IDENT")
        if token_type in ["TRUE", "YES"]:
            token.type = "INT_LITERAL"
            token.value = 1
        elif token_type in ["FALSE", "NO"]:
            token.type = "INT_LITERAL"
            token.value = 0
        else:
            token.type = token_type
            # check, whether the identifier is under sources, in such case
            # change the type to SOURCE_NAME
            for source in self._sources:
                if source.name == token.value:
                    token.type = "SOURCE_NAME"
                    break
        return token
Beispiel #3
0
    def error(self, token: Token) -> Token:
        """Token error handler.

        The lexing index is incremented so lexing can continue, however, an
        error token is returned. The token contains the whole text starting
        with the detected error.

        :param token: invalid token.
        :return: the invalid token.
        """
        self.index += 1
        token.value = token.value[0]
        return token
Beispiel #4
0
    def BINARY_BLOB(self, token: Token) -> Token:
        """Token rule to detect binary blob.

        A binary blob is a sequence of hexadecimal bytes in double curly braces.

        Example:
        {{aa bb cc 1F 3C}}

        :param token: token matching binary blob pattern
        :return: Token representing binary blob
        """
        # return just the content between braces
        value = token.value[2:-2]

        token.value = "".join(value.split())
        return token
Beispiel #5
0
def post_lex(toks):
    """Tweak the token stream to simplify the grammar"""
    term = Token()
    term.value = ";"
    term.type = "TERM"

    try:
        t = next(toks)
    except StopIteration:
        return []

    for next_tok in chain(toks, [term]):
        yield t

        term.lineno = t.lineno
        term.index = t.index

        # TERMs after blocks and after the last expression in a block are
        # optional. Fill them in here to make the grammar simpler.
        #
        # There are two places where '}' is used, and so there are two places
        # terminators must be consumed: block expressions and hashes.
        #
        # block: { a; b; c } -> { a; b; c; };
        #
        # hashes: { a: b, c: d } -> { a: b, c: d; };

        # Closing a block or hash
        if t.type == "}" and next_tok.type != ";":
            yield term

        # Last expression in a block or hash
        if next_tok.type == "}" and t.type != "TERM":
            yield term

        t = next_tok
    yield t
Beispiel #6
0
 def __init__(self):
     Token.__init__(self)
     self.changes = []
Beispiel #7
0
 def INTEGER(self, t: Token):
     t.value = int(t.value)
     return t
Beispiel #8
0
 def LANGUAGE_CODE(self, t: Token):
     t.value = LanguageCode(t.value[3:])
     return t