Esempio n. 1
0
    class Strip(TokenizeContext):
        class Dot(Token):
            pattern = "."

        class Item(TokenizeContext):
            pass

        pattern = And(Option(Dot), Item, Option(Many(And(Dot, Item))))
Esempio n. 2
0
class Indent(TokenizeContext):
    class Tab(Token):
        pattern = "\t"

    class Space2(Token):
        pattern = "  "

    pattern = Option(Many(Or(Tab, Space2)))
Esempio n. 3
0
class Number(TokenizeContext):
    class Spacing(Token):
        pattern = "_"

    class LeadingZeros(TokenizeContext):
        pass

    class Whole(TokenizeContext):
        pass

    class Decimal(TokenizeContext):
        class Dot(Token):
            pattern = "."

    # 000 52_300 . 322_000
    pattern = And(Whole, Option(Decimal))
Esempio n. 4
0
    class Block(TokenizeContext):
        class Start(Token):
            pattern = "'"

        pattern = And(Start, Option(Or(And(Space, Option(Str)), Line)))
Esempio n. 5
0
class Id(TokenizeContext):
    class Strip(TokenizeContext):
        class Dot(Token):
            pattern = "."

        class Item(TokenizeContext):
            pass

        pattern = And(Option(Dot), Item, Option(Many(And(Dot, Item))))

    class Special(TokenizeContext):
        class Char(Token):
            @classmethod
            def is_special_symbol_char(cls, l):
                """
				TODO: might not be precise, not so black n white
				goal: a special symbol may act as a word separator,
					thus, should not be a "letter", though seemed easier to define it as all
					unicode chars in "unicode symbol blocks" except eg. emojis and some
					letters that happend to be there?
				"""
                c = ord(l)
                if c < 0xC0:
                    for r in ('_$ "(){}[]', 'ŠŒŽšœžŸ'):
                        if l in r: return False
                    for r in (range(0x30, 0x39 + 1), range(0x41, 0x5A + 1),
                              range(0x61, 0x7A + 1)):
                        if c in r: return False
                    return True
                # symbols
                if c in range(0x2000, 0x2800):
                    for r in (
                            "⌚⌛", "⏩⏪⏫⏬⏭⏮⏯⏰⏱⏲⏳⏴⏵⏶⏷⏸⏹⏺",
                            "☄☔☕☘☝☠☢☣☦☪☮☯☸♈♉♊♋♌♍♎♏♐♑♒♓♿⚒⚓⚔⚕⚖⚗⚙⚛⚜⚡⚪⚫⚰⚱⚽⚾⛄⛅⛈⛎⛏⛑⛓⛔⛩⛪⛰⛱⛲⛳⛴⛵⛷⛸⛹⛺⛽✅✊✋✌✍✨❌❎❓❔❕❗➕➖➗➰➿"
                    ):
                        if l in r: return False
                    return True
                if c in range(0x2900, 0x2c00):
                    for r in ("⬛⭐⭕"):
                        if l in r: return False
                    return True
                if c in range(0x3000, 0x3040): True
                return False

            @classmethod
            def match(cls, linestr, start=0):
                if start >= len(linestr): return None
                if linestr.startswith('//', start): return None
                l = linestr[start]
                return cls.with_linestr(
                    start, start +
                    1, linestr) if cls.is_special_symbol_char(l) else None

        pattern = Many(Char)

    class Base(Token):
        allowed_chars = '_$'
        disallowed_chars = '(){}[]" \t/'
        disallowedTokens = []  # see below

        @classmethod
        def match(cls, linestr, start=0):
            if start >= len(linestr): return None
            l = linestr[start]
            if l in cls.allowed_chars:
                pass
            elif l in cls.disallowed_chars:
                return None
            else:
                v, r, ok = match(Or(*cls.disallowedTokens), (linestr, start))
                if ok: return None
            return cls.with_linestr(start, start + 1, linestr)

    class Start(Base):
        pass

    class Middle(Base):
        pass

    class Tail(TokenizeContext):
        @classmethod
        def process_patternMatch(cls, v):
            if type(v) is list:
                middle, *rest = v
                vs = rest[0].patternMatch if len(rest) == 1 and type(
                    rest[0]) is cls else rest
                return [middle, *vs] if type(vs) is list else [middle, vs]
            return v

    pattern = And(Start, Option(Tail))

    @classmethod
    def process_patternMatch(cls, v):
        return [v[0]] if not v[1] else [v[0], v[1].patternMatch]
Esempio n. 6
0
 def setPattern(cls):
     cls.pattern = And(cls.Start, Option(Space.White),
                       Option(Or(Expression, Id.Strip)),
                       Option(Space.White), Option(cls.End))
Esempio n. 7
0
    class Line(TokenizeContext):
        class Start(Token):
            pattern = "//"

        pattern = And(Start, Option(Or(And(Space, Option(Str)), Line)))

    class Block(TokenizeContext):
        class Start(Token):
            pattern = "'"

        pattern = And(Start, Option(Or(And(Space, Option(Str)), Line)))


# file/line

Line.pattern = And(Indent, Option(Space.White),
                   Or(Comment.Top, Comment.Block, Expression),
                   Option(Space.White), Option(Comment.Line))

# Logic for tokenizing whole file elsewhere
# class File(TokenizeContext):
# 	pattern = Option(Many(Line))

# primitives


class String(TokenizeContext):
    class Start(Token):
        pattern = '"'

    class End(Token):