Example #1
0
def register_tokens(token_dict):
    def next_token_slot():
        index = max(token.tok_name.keys(), default=0)
        return index + 1

    escaped_tokens = []
    for name, value in token_dict.items():
        slot = next_token_slot()

        setattr(token, name, slot)
        token.tok_name[slot] = name
        token.EXACT_TOKEN_TYPES[value] = slot

        escaped_tokens.append(re.escape(value))

    tokenize.PseudoToken = tokenize.Whitespace + tokenize.group(
        *escaped_tokens,
        tokenize.PseudoExtras,
        tokenize.Number,
        tokenize.Funny,
        tokenize.ContStr,
        tokenize.Name,
    )
Example #2
0
    ]
}

rules = [['NUMBER', tokenize.Number], ['STRING', tokenize.String],
         ['NAME', tokenize.Name], ['PLUS', r'\+'], ['MINUS', r'-'],
         ['MUL', r'\*'], ['POWER', r'\^'], ['DIV', r'/'], ['MOD', r'%'],
         ['LPAR', r'\('], ['RPAR', r'\)'], ['LSQB', r'\['], ['RSQB', r'\]'],
         ['LBRACE', r'\{'], ['RBRACE', r'\}'], ['EQ', r'=='], ['LE', r'<='],
         ['LT', r'<'], ['GE', r'>='], ['GT', r'>'], ['NE', r'!='],
         ['DOT', r'\.'], ['COMMA', r'\,'], ['COLON', r':'], ['ASSIGN', r'='],
         ['NEWLINE', '\n']]

for name, regex in rules:
    lg.add(name, regex)

lg.ignore(tokenize.group(r'\\\r?\n', r'[ \f\t]+', tokenize.Comment))


class DragonLexer:
    def __init__(self):
        self.lexer = lg.build()

    def lex(self, code):
        tokens = self.lexer.lex(code)
        while True:
            token = next(tokens)
            tokentype = token.gettokentype()
            if tokentype == 'NAME':
                token.name = reserved.get(token.getstr(), tokentype)
            yield token
Example #3
0
import codecs
import io
import token as tokens
import tokenize
from functools import partial

tokens.COLONEQUAL = 0xFF
tokens.tok_name[0xFF] = "COLONEQUAL"
tokenize.EXACT_TOKEN_TYPES[":="] = tokens.COLONEQUAL

tokenize.PseudoToken = tokenize.Whitespace + tokenize.group(
    r":=",
    tokenize.PseudoExtras,
    tokenize.Number,
    tokenize.Funny,
    tokenize.ContStr,
    tokenize.Name,
)


def generate_walrused_source(readline):
    source_tokens = list(tokenize.tokenize(readline))
    modified_source_tokens = source_tokens.copy()

    def inc(token, by=1, page=0):
        start = list(token.start)
        end = list(token.end)

        start[page] += by
        end[page] += by
Example #4
0
#! /usr/bin/env python3
# -*- coding: utf-8 -*-

import re
import tokenize


# 2016-08-13T23:28+08:00
# From Chromium depo tools/scm.py
def ValidateEmail(email):
    return (re.match(r"^[a-zA-Z0-9._%-+]+@[a-zA-Z0-9._%-]+.[a-zA-Z]{2,6}$",
                     email) is not None)


# 2017-12-29T14:16+08:00
int_0_255_re = tokenize.group(r'(\d{1,2})|(1\d\d)|(2[0-4]\d)|(25[0-5])')
ipv4_re = '^' + tokenize.group(int_0_255_re +
                               '.') + r'{3}' + int_0_255_re + '$'


# References:
# Sams Teach Yourself Regular Expressions in 10 Minutes
def ValidateIPv4(ip_addr):
    return (re.match(ipv4_re, ip_addr) is not None)


# References:
# https://github.com/Microsoft/vcpkg/blob/master/toolsrc/src/vcpkg/base/files.cpp#L9
def ValidateFileName(filename):
    return (re.match(r'[^\[\/:*?"<>|\]]+', filename) is not None)
Example #5
0
from mython.trampoline import TokenStream

# ______________________________________________________________________
# Module data

tok_name = tokenize.tok_name.copy()
BANG, MYEXPR, MYSUITE = (tokenize.N_TOKENS + count for count in range(3))
tok_name[BANG] = 'BANG'
tok_name[MYEXPR] = 'MYEXPR'
tok_name[MYSUITE] = 'MYSUITE'
N_TOKENS = tokenize.N_TOKENS + 3

pseudoprog = re.compile(
    tokenize.Whitespace +
    tokenize.group(tokenize.PseudoExtras, tokenize.Number, tokenize.Funny,
                   tokenize.ContStr, tokenize.Name, r"[!]")
    )

CLOSERS = {
    '{' : '}',
    '(' : ')',
    '<' : '>',
    '[' : ']',
}

__DEBUG__ = False

# ______________________________________________________________________
# Class definitions.

class MythonReadliner (object):
Example #6
0
from typing import Iterator

from pyasdl.__pegen.tokenizer import Tokenizer
from pyasdl.grammar import Module, Sum
from pyasdl.parser import GeneratedParser as _ASDLParser

__all__ = ["parse", "fetch_comments", "is_simple_sum"]
# Since the pegen.tokenizer.Tokenizer uses .type instead of .exact_type
# it is not trivial to change the default comment behavior. A workaround
# way is sanitizing the input before passing it into the real parser

COMMENT_PATTERN = _tokenize.Whitespace + r"--.*?\n"
_tokenize.PseudoToken = _tokenize.Whitespace + _tokenize.group(
    COMMENT_PATTERN,
    _tokenize.PseudoExtras,
    _tokenize.Number,
    _tokenize.Funny,
    _tokenize.ContStr,
    _tokenize.Name,
)


def tokenize(
    source: str, ignore_comments: bool = True
) -> Iterator[_tokenize.TokenInfo]:
    # A wrapper around tokenize.generate_tokens to pass comment tokens
    source_buffer = io.StringIO(source)
    for token in _tokenize.generate_tokens(source_buffer.readline):
        if token.string.startswith("--") and ignore_comments:
            continue
        yield token
Example #7
0
 def update_event(self, inp=-1):
     self.set_output_val(0, tokenize.group())
Example #8
0
def group(*choices):
    return tokenize.group(*choices)
Example #9
0
from basil.lang.python import TokenUtils
from basil.lang.mython.MyFrontExceptions import MyFrontSyntaxError

# ______________________________________________________________________
# Module data

tok_name = tokenize.tok_name.copy()
BANG, MYEXPR, MYSUITE = (tokenize.N_TOKENS + count for count in xrange(3))
tok_name[BANG] = 'BANG'
tok_name[MYEXPR] = 'MYEXPR'
tok_name[MYSUITE] = 'MYSUITE'
N_TOKENS = tokenize.N_TOKENS + 3

pseudoprog = re.compile(
    tokenize.Whitespace +
    tokenize.group(tokenize.PseudoExtras, tokenize.Number, tokenize.Funny,
                   tokenize.ContStr, tokenize.Name, r"[!]")
    )

CLOSERS = {
    '{' : '}',
    '(' : ')',
    '<' : '>',
    '[' : ']',
}

# ______________________________________________________________________
# Compatibility layer 2.5/2.6

if type(__builtins__) == dict:
    define_next = "next" not in __builtins__.keys()
else: