Beispiel #1
0
    def tokenize(self, string):
        keywords = set([
            'BA_', 'BA_DEF_', 'BA_DEF_DEF_', 'BA_DEF_DEF_REL_', 'BA_DEF_REL_',
            'BA_DEF_SGTYPE_', 'BA_REL_', 'BA_SGTYPE_', 'BO_', 'BO_TX_BU_',
            'BS_', 'BU_', 'BU_BO_REL_', 'BU_EV_REL_', 'BU_SG_REL_', 'CAT_',
            'CAT_DEF_', 'CM_', 'ENVVAR_DATA_', 'EV_', 'EV_DATA_', 'FILTER',
            'NS_', 'NS_DESC_', 'SG_', 'SG_MUL_VAL_', 'SGTYPE_', 'SGTYPE_VAL_',
            'SIG_GROUP_', 'SIG_TYPE_REF_', 'SIG_VALTYPE_', 'SIGTYPE_VALTYPE_',
            'VAL_', 'VAL_TABLE_', 'VERSION'
        ])

        names = {
            'LPAREN': '(',
            'RPAREN': ')',
            'LBRACE': '[',
            'RBRACE': ']',
            'COMMA': ',',
            'AT': '@',
            'SCOLON': ';',
            'COLON': ':',
            'PIPE': '|',
            'SIGN': '+/-'
        }

        token_specs = [('SKIP', r'[ \r\n\t]+|//.*?\n'),
                       ('NUMBER', r'-?\d+\.?\d*([eE][+-]?\d+)?'),
                       ('WORD', r'[A-Za-z0-9_]+'),
                       ('STRING', r'"(\\"|[^"])*?"'), ('LPAREN', r'\('),
                       ('RPAREN', r'\)'), ('LBRACE', r'\['), ('RBRACE', r'\]'),
                       ('COMMA', r','), ('PIPE', r'\|'), ('AT', r'@'),
                       ('SIGN', r'[+-]'), ('SCOLON', r';'), ('COLON', r':'),
                       ('MISMATCH', r'.')]

        tokens, token_regex = tokenize_init(token_specs)

        for mo in re.finditer(token_regex, string, re.DOTALL):
            kind = mo.lastgroup

            if kind == 'SKIP':
                pass
            elif kind == 'STRING':
                value = mo.group(kind)[1:-1].replace('\\"', '"')
                tokens.append(Token(kind, value, mo.start()))
            elif kind != 'MISMATCH':
                value = mo.group(kind)

                if value in keywords:
                    kind = value

                if kind in names:
                    kind = names[kind]

                tokens.append(Token(kind, value, mo.start()))
            else:
                raise TokenizeError(string, mo.start())

        return tokens
Beispiel #2
0
def tokenize(items, add_eof_token=True):
    tokens = []

    for item in items:
        if len(item) == 2:
            token = Token(*item, offset=1)
        else:
            token = Token(*item)

        tokens.append(token)

    if add_eof_token:
        tokens.append(Token('__EOF__', None, -1))

    return tokens
Beispiel #3
0
    def tokenize(self, string):
        token_specs = [('SKIP', r'\r+|\s*\|[^\n]*'), ('NL', r'\n'),
                       ('KEYWORD', r'\[.+?\]'), ('WORD', r'[^ \n\t\r\f\v=]+'),
                       ('WS', r'[ \t\r\f\v]+'), ('EQ', r'='),
                       ('MISMATCH', r'.')]

        tokens, token_regex = tokenize_init(token_specs)

        for mo in re.finditer(token_regex, string, re.DOTALL):
            kind = mo.lastgroup

            if kind == 'SKIP':
                pass
            elif kind != 'MISMATCH':
                value = mo.group(kind)

                if kind == 'KEYWORD':
                    keyword = value.lower().replace('_', ' ')

                    if keyword in KEYWORDS:
                        kind = keyword

                tokens.append(Token(kind, value, mo.start()))
            else:
                raise TokenizeError(string, mo.start())

        return tokens
Beispiel #4
0
    def test_create_token_re(self):
        datas = [([('A', r'a')], '(?P<A>a)'),
                 ([('A', r'b'), ('C', r'd')], '(?P<A>b)|(?P<C>d)')]

        for spec, expected_re_token in datas:
            tokens, re_token = tokenize_init(spec)
            self.assertEqual(
                tokens, [Token(kind='__SOF__', value='__SOF__', offset=0)])
            self.assertEqual(re_token, expected_re_token)
Beispiel #5
0
    def tokenize(self, string):
        names = {
            'LPAREN': '(',
            'RPAREN': ')',
            'LBRACE': '[',
            'RBRACE': ']',
            'COMMA': ',',
            'ASSIGN': '=',
            'ENUMS': '{ENUMS}',
            'SIGNALS': '{SIGNALS}',
            'SEND': '{SEND}',
            'RECEIVE': '{RECEIVE}',
            'SENDRECEIVE': '{SENDRECEIVE}',
            'U': '/u:',
            'F': '/f:',
            'O': '/o:',
            'MIN': '/min:',
            'MAX': '/max:',
            'D': '/d:',
            'LN': '/ln:',
            'E': '/e:',
            'P': '/p:',
            'M': '-m',
            'H': '-h',
            'B': '-b',
            'S': '-s',
            'T': '-t',
            'V': '-v'
        }

        re_string = r'"(\\"|[^"])*?"'

        token_specs = [('SKIP', r'[ \r\n\t]+'), ('COMMENT', r'//.*?\n'),
                       ('NUMBER', r'-?\d+\.?[0-9A-F]*([eE][+-]?\d+)?'),
                       ('STRING', re_string),
                       ('U', r'/u:({}|\S+)'.format(re_string)), ('F', r'/f:'),
                       ('O', r'/o:'), ('MIN', r'/min:'), ('MAX', r'/max:'),
                       ('D', r'/d:'), ('LN', r'/ln:'), ('E', r'/e:'),
                       ('P', r'/p:'), ('M', r'\-m'), ('H', r'\-h'),
                       ('B', r'\-b'), ('S', r'\-s'), ('T', r'\-t'),
                       ('V', r'\-v'), ('LPAREN', r'\('), ('RPAREN', r'\)'),
                       ('LBRACE', r'\['), ('RBRACE', r'\]'), ('COMMA', r','),
                       ('ASSIGN', r'='), ('ENUMS', r'\{ENUMS\}'),
                       ('SIGNALS', r'\{SIGNALS\}'), ('SEND', r'\{SEND\}'),
                       ('RECEIVE', r'\{RECEIVE\}'),
                       ('SENDRECEIVE', r'\{SENDRECEIVE\}'),
                       ('WORD', r'[^\s=\(\]\-]+'), ('MISMATCH', r'.')]

        tokens, token_regex = tokenize_init(token_specs)

        for mo in re.finditer(token_regex, string, re.DOTALL):
            kind = mo.lastgroup

            if kind == 'SKIP':
                pass
            elif kind == 'STRING':
                value = mo.group(kind)[1:-1].replace('\\"', '"')
                tokens.append(Token(kind, value, mo.start()))
            elif kind != 'MISMATCH':
                value = mo.group(kind)

                if value in self.KEYWORDS:
                    kind = value

                if kind in names:
                    kind = names[kind]

                tokens.append(Token(kind, value, mo.start()))
            else:
                raise TokenizeError(string, mo.start())

        return tokens
Beispiel #6
0
    def test_parser_default_keywords(self):
        class Parser(textparser.Parser):
            def token_specs(self):
                return [('SKIP', r'[ \r\n\t]+'),
                        ('NUMBER', r'-?\d+(\.\d+)?([eE][+-]?\d+)?'),
                        ('DOT', '.', r'\.'), ('WORD', r'[A-Za-z0-9_]+'),
                        ('ESCAPED_STRING', r'"(\\"|[^"])*?"'),
                        ('MISMATCH', r'.')]

            def grammar(self):
                return Sequence(
                    'WORD', Optional('WORD'), 'ESCAPED_STRING', 'WORD',
                    Optional(
                        choice(DelimitedList('ESCAPED_STRING'),
                               ZeroOrMore('NUMBER'))), '.')

        datas = [('IF "foo" bar .', ['IF', [], '"foo"', 'bar', [[]], '.'], [
            Token(kind='WORD', value='IF', offset=0), [],
            Token(kind='ESCAPED_STRING', value='"foo"', offset=3),
            Token(kind='WORD', value='bar', offset=9), [[]],
            Token(kind='.', value='.', offset=13)
        ]),
                 ('IF B "" b 1 2 .',
                  ['IF', ['B'], '""', 'b', [['1', '2']], '.'], [
                      Token(kind='WORD', value='IF', offset=0),
                      [Token(kind='WORD', value='B', offset=3)],
                      Token(kind='ESCAPED_STRING', value='""', offset=5),
                      Token(kind='WORD', value='b', offset=8),
                      [[
                          Token(kind='NUMBER', value='1', offset=10),
                          Token(kind='NUMBER', value='2', offset=12)
                      ]],
                      Token(kind='.', value='.', offset=14)
                  ])]

        for text, expected_tree, expected_token_tree in datas:
            tree = Parser().parse(text)
            self.assertEqual(tree, expected_tree)
            tree = Parser().parse(text, token_tree=True)
            self.assertEqual(tree, expected_token_tree)
Beispiel #7
0
    def tokenize(self, string):
        keywords = set([
            'FormatVersion',
            'Title',
            'Enum',
            'Sig',
            'ID',
            'Len',
            'Mux',
            'CycleTime',
            'Timeout',
            'MinInterval',
            'Sig',
        ])

        names = {
            'LPAREN': '(',
            'RPAREN': ')',
            'LBRACE': '[',
            'RBRACE': ']',
            'COMMA': ',',
            'ASSIGN': '=',
            'ENUMS': '{ENUMS}',
            'SIGNALS': '{SIGNALS}',
            'SEND': '{SEND}',
            'RECEIVE': '{RECEIVE}',
            'SENDRECEIVE': '{SENDRECEIVE}',
            'U': '/u:',
            'F': '/f:',
            'O': '/o:',
            'MIN': '/min:',
            'MAX': '/max:',
            'D': '/d:',
            'LN': '/ln:',
            'E': '/e:',
            'M': '-m'
        }

        token_specs = [('SKIP', r'[ \r\n\t]+|//.*?\n'),
                       ('NUMBER', r'-?\d+\.?\d*([eE][+-]?\d+)?'),
                       ('WORD', r'[A-Za-z0-9_\*]+'),
                       ('STRING', r'"(\\"|[^"])*?"'), ('LPAREN', r'\('),
                       ('RPAREN', r'\)'), ('LBRACE', r'\['), ('RBRACE', r'\]'),
                       ('COMMA', r','), ('ASSIGN', r'='),
                       ('ENUMS', r'\{ENUMS\}'), ('SIGNALS', r'\{SIGNALS\}'),
                       ('SEND', r'\{SEND\}'), ('RECEIVE', r'\{RECEIVE\}'),
                       ('SENDRECEIVE', r'\{SENDRECEIVE\}'), ('U', r'/u:'),
                       ('F', r'/f:'), ('O', r'/o:'), ('MIN', r'/min:'),
                       ('MAX', r'/max:'), ('D', r'/d:'), ('LN', r'/ln:'),
                       ('E', r'/e:'), ('M', r'\-m'), ('MISMATCH', r'.')]

        tokens, token_regex = tokenize_init(token_specs)

        for mo in re.finditer(token_regex, string, re.DOTALL):
            kind = mo.lastgroup

            if kind == 'SKIP':
                pass
            elif kind == 'STRING':
                value = mo.group(kind)[1:-1].replace('\\"', '"')
                tokens.append(Token(kind, value, mo.start()))
            elif kind != 'MISMATCH':
                value = mo.group(kind)

                if value in keywords:
                    kind = value

                if kind in names:
                    kind = names[kind]

                tokens.append(Token(kind, value, mo.start()))
            else:
                raise TokenizeError(string, mo.start())

        return tokens