コード例 #1
ファイル: __init__.py プロジェクト: mewbak/rim
    class Strip(TokenizeContext):
        class Dot(Token):
            pattern = "."

        class Item(TokenizeContext):

        pattern = And(Option(Dot), Item, Option(Many(And(Dot, Item))))
コード例 #2
ファイル: __init__.py プロジェクト: mewbak/rim
class Indent(TokenizeContext):
    class Tab(Token):
        pattern = "\t"

    class Space2(Token):
        pattern = "  "

    pattern = Option(Many(Or(Tab, Space2)))
コード例 #3
ファイル: __init__.py プロジェクト: mewbak/rim
class Number(TokenizeContext):
    class Spacing(Token):
        pattern = "_"

    class LeadingZeros(TokenizeContext):

    class Whole(TokenizeContext):

    class Decimal(TokenizeContext):
        class Dot(Token):
            pattern = "."

    # 000 52_300 . 322_000
    pattern = And(Whole, Option(Decimal))
コード例 #4
ファイル: __init__.py プロジェクト: mewbak/rim
    class Block(TokenizeContext):
        class Start(Token):
            pattern = "'"

        pattern = And(Start, Option(Or(And(Space, Option(Str)), Line)))
コード例 #5
ファイル: __init__.py プロジェクト: mewbak/rim
class Id(TokenizeContext):
    class Strip(TokenizeContext):
        class Dot(Token):
            pattern = "."

        class Item(TokenizeContext):

        pattern = And(Option(Dot), Item, Option(Many(And(Dot, Item))))

    class Special(TokenizeContext):
        class Char(Token):
            def is_special_symbol_char(cls, l):
				TODO: might not be precise, not so black n white
				goal: a special symbol may act as a word separator,
					thus, should not be a "letter", though seemed easier to define it as all
					unicode chars in "unicode symbol blocks" except eg. emojis and some
					letters that happend to be there?
                c = ord(l)
                if c < 0xC0:
                    for r in ('_$ "(){}[]', 'ŠŒŽšœžŸ'):
                        if l in r: return False
                    for r in (range(0x30, 0x39 + 1), range(0x41, 0x5A + 1),
                              range(0x61, 0x7A + 1)):
                        if c in r: return False
                    return True
                # symbols
                if c in range(0x2000, 0x2800):
                    for r in (
                            "⌚⌛", "⏩⏪⏫⏬⏭⏮⏯⏰⏱⏲⏳⏴⏵⏶⏷⏸⏹⏺",
                        if l in r: return False
                    return True
                if c in range(0x2900, 0x2c00):
                    for r in ("⬛⭐⭕"):
                        if l in r: return False
                    return True
                if c in range(0x3000, 0x3040): True
                return False

            def match(cls, linestr, start=0):
                if start >= len(linestr): return None
                if linestr.startswith('//', start): return None
                l = linestr[start]
                return cls.with_linestr(
                    start, start +
                    1, linestr) if cls.is_special_symbol_char(l) else None

        pattern = Many(Char)

    class Base(Token):
        allowed_chars = '_$'
        disallowed_chars = '(){}[]" \t/'
        disallowedTokens = []  # see below

        def match(cls, linestr, start=0):
            if start >= len(linestr): return None
            l = linestr[start]
            if l in cls.allowed_chars:
            elif l in cls.disallowed_chars:
                return None
                v, r, ok = match(Or(*cls.disallowedTokens), (linestr, start))
                if ok: return None
            return cls.with_linestr(start, start + 1, linestr)

    class Start(Base):

    class Middle(Base):

    class Tail(TokenizeContext):
        def process_patternMatch(cls, v):
            if type(v) is list:
                middle, *rest = v
                vs = rest[0].patternMatch if len(rest) == 1 and type(
                    rest[0]) is cls else rest
                return [middle, *vs] if type(vs) is list else [middle, vs]
            return v

    pattern = And(Start, Option(Tail))

    def process_patternMatch(cls, v):
        return [v[0]] if not v[1] else [v[0], v[1].patternMatch]
コード例 #6
ファイル: __init__.py プロジェクト: mewbak/rim
 def setPattern(cls):
     cls.pattern = And(cls.Start, Option(Space.White),
                       Option(Or(Expression, Id.Strip)),
                       Option(Space.White), Option(cls.End))
コード例 #7
ファイル: __init__.py プロジェクト: mewbak/rim
    class Line(TokenizeContext):
        class Start(Token):
            pattern = "//"

        pattern = And(Start, Option(Or(And(Space, Option(Str)), Line)))

    class Block(TokenizeContext):
        class Start(Token):
            pattern = "'"

        pattern = And(Start, Option(Or(And(Space, Option(Str)), Line)))

# file/line

Line.pattern = And(Indent, Option(Space.White),
                   Or(Comment.Top, Comment.Block, Expression),
                   Option(Space.White), Option(Comment.Line))

# Logic for tokenizing whole file elsewhere
# class File(TokenizeContext):
# 	pattern = Option(Many(Line))

# primitives

class String(TokenizeContext):
    class Start(Token):
        pattern = '"'

    class End(Token):