Esempio n. 1
0
    def test_ignore_recursion(self):
        lg = LexerGenerator()
        lg.ignore(r"\s")

        l = lg.build()

        assert list(l.lex(" " * 2000)) == []
Esempio n. 2
0
    def test_arithmetic(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.add("TIMES", r"\*")

        pg = ParserGenerator(["NUMBER", "PLUS", "TIMES"], precedence=[
            ("left", ["PLUS"]),
            ("left", ["TIMES"]),
        ])

        @pg.production("main : expr")
        def main(p):
            return p[0]

        @pg.production("expr : expr PLUS expr")
        @pg.production("expr : expr TIMES expr")
        def expr_binop(p):
            return BoxInt({
                "+": operator.add,
                "*": operator.mul
            }[p[1].getstr()](p[0].getint(), p[2].getint()))

        @pg.production("expr : NUMBER")
        def expr_num(p):
            return BoxInt(int(p[0].getstr()))

        lexer = lg.build()
        parser = pg.build()

        assert parser.parse(lexer.lex("3*4+5"))
Esempio n. 3
0
    def test_regex_flags(self):
        lg = LexerGenerator()
        lg.add("ALL", r".*", re.DOTALL)

        l = lg.build()

        stream = l.lex("test\ndotall")
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 1
        assert t.getstr() == "test\ndotall"

        with raises(StopIteration):
            stream.next()
Esempio n. 4
0
    def test_position(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.ignore(r"\s+")

        l = lg.build()

        stream = l.lex("2 + 3")
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 1
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 3
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 5
        with raises(StopIteration):
            stream.next()

        stream = l.lex("2 +\n    37")
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 1
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 3
        t = stream.next()
        assert t.source_pos.lineno == 2
        assert t.source_pos.colno == 5
        with raises(StopIteration):
            stream.next()
    def test_basic_lexer(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")

        l = lg.build()

        def f(n):
            tokens = l.lex("%d+%d+%d" % (n, n, n))
            i = 0
            s = 0
            while i < 5:
                t = tokens.next()
                if i % 2 == 0:
                    if t.name != "NUMBER":
                        return -1
                    s += int(t.value)
                else:
                    if t.name != "PLUS":
                        return -2
                    if t.value != "+":
                        return -3
                i += 1
            if tokens.next() is not None:
                return -4
            return s

        assert self.run(f, [14]) == 42
Esempio n. 6
0
    def test_repr(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.ignore(r"\s+")
 
        l = lg.build()
 
        stream = l.lex("2 + 3")
        assert str(stream) is not None
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "2"
        assert str(stream) is not None
        t = stream.next()
        assert t.name == "PLUS"
Esempio n. 7
0
    def test_regex_flags_ignore(self):
        lg = LexerGenerator()
        lg.add("ALL", r".*", re.DOTALL)
        lg.ignore(r".*", re.DOTALL)

        l = lg.build()

        stream = l.lex("test\ndotall")

        with raises(StopIteration):
            stream.next()
Esempio n. 8
0
    def test_error(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")

        l = lg.build()

        stream = l.lex('fail')
        with raises(LexingError) as excinfo:
            stream.next()

        assert 'SourcePosition(' in repr(excinfo.value)
Esempio n. 9
0
    def test_ignore(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.ignore(r"\s+")

        l = lg.build()

        stream = l.lex("2 + 3")
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "2"
        t = stream.next()
        assert t.name == "PLUS"
        assert t.value == "+"
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "3"
        assert t.source_pos.idx == 4
        with raises(StopIteration):
            stream.next()
Esempio n. 10
0
    def test_newline_position(self):
        lg = LexerGenerator()
        lg.add("NEWLINE", r"\n")
        lg.add("SPACE", r" ")

        l = lg.build()

        stream = l.lex(" \n ")
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 1
        t = stream.next()
        assert t.source_pos.lineno == 1
        assert t.source_pos.colno == 2
        t = stream.next()
        assert t.source_pos.lineno == 2
        assert t.source_pos.colno == 1
Esempio n. 11
0
    def test_simple(self):
        lg = LexerGenerator()
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")

        l = lg.build()

        stream = l.lex("2+3")
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "2"
        t = stream.next()
        assert t.name == "PLUS"
        assert t.value == "+"
        t = stream.next()
        assert t.name == "NUMBER"
        assert t.value == "3"
        assert t.source_pos.idx == 2
        t = stream.next()
        assert t is None
Esempio n. 12
0
 def __init__(self):
     self.lexer = LexerGenerator()
Esempio n. 13
0
from rply import LexerGenerator


lg = LexerGenerator()

lg.add("STEP", r"s")
lg.add("TURN_LEFT", r"l")
lg.add("TURN_RIGHT", r"r")
lg.add("FUNC", r"a|b|c|d|e|f|g|h|i|j|k|m|n|o|p|q|t|u|v|w|x|y|z")
lg.add("COLON", r"\:")
lg.add("NEWLINE", r"\n+ *\n*")
lg.add("NAME", r"[A-Z]")
lg.add("NUMBER", r"\d+")
lg.add("PLUS", r"\+")
lg.add("MINUS", r"\-")
lg.add("(", r"\(")
lg.add(")", r"\)")
lg.add(",", r"\,")

lg.ignore(r" +")
lg.ignore(r"\#.*")

TOKENS = [r.name for r in lg.rules]

lexer = lg.build()
Esempio n. 14
0
    'PARAM_NAME': r'((in|out|inout)) (\D\w+) ((int|varchar|double|bit|boolean))',
    'L_PAREN': r'\(+',
    'R_PAREN': r'\)+',
    'COMMA': ',+',
    'SELECT': r'(select)+',
    'FROM': r'(from)+',
    'ALL': r'(\*)+',
    'UPDATE': r'(update)+',
    'VALUES': r'(values)+',
    'SEMI': r';+',
    'WHERE': r'(where)+',
    'SET': r'(set)+',
    'OPERATOR': r'(\=|\<\=|\>\=|\>|\<|\<\>|\!\=)+',
    'ARITHMETICAL': r'(\+|\-|\/)+',

    'SINGLE_QUOTE': '\'+',
    'LT': '<+',
    'NUMBER': r'\d+',
    'ALIAS': r'(as)+',
    'OBJECT_NAME': r'[a-z_]+',
}

lg = LexerGenerator()
lg.ignore(r'\s')

for tok_name, tok_val in TOKENS.items():
    lg.add(tok_name, tok_val)



Esempio n. 15
0
File: parser.py Progetto: dirk/stalk
from rply import ParserGenerator, LexerGenerator

lg = LexerGenerator()

lg.add("INTEGER", r"-?0|([1-9][0-9]*)")
lg.add("DECIMAL", r"\.[0-9]+")
_id = r"[A-Za-z][A-Za-z0-9_]*"
lg.add("KEYWORD", _id + r":")
lg.add("IDENTIFIER", _id)
lg.add("SYMBOL", r":" + _id)
_comment = r"[ \t]*#[^\n]*"
lg.add("COMMENT", _comment)
lg.add("LPAREN", r"\([ \t\n]*")
lg.add("RPAREN", r"[ \t\n]*\)")
# TODO: Maybe clear this up to be prettier.
lg.add("PREFACE", r"<[A-Za-z0-9_:@, \t\n]+>[ \t\n]*")
lg.add("LBRACK", r"{[ \t\n]*")
lg.add("RBRACK", r"[ \t\n]*}")
lg.add("VERT", r"\|[ \t\n]*")
lg.add("LSQ", r"\[[ \t\n]*")
lg.add("RSQ", r"[ \t\n]*\]")
lg.add("CONT", r"[ \t]+\\(" + _comment + r")?\n[ \t]*")
lg.add("SWS", r"[ \t]+")
lg.add("COMMA", ",[ \t\n]*")
lg.add("TERMINAL", r"[ \t]*\n[ \t\n]*")
# TODO: Make strings parsing not suck dick.
lg.add("STRING", r"\"[^\"]*\"")
# TODO: Finalize operators
lg.add("OPERATOR", r"[+\-=*/\^]")
Esempio n. 16
0
def lex():
    lg = LexerGenerator()

    # build up a set of token names and regexes they match
    lg.add('WHITESPACE', r'[ ]+')
    lg.add('INTEGER', r'-?\d+')
    lg.add('IF', r'if(?!\w)')
    lg.add('ELSE', r'else(?!\w)')
    lg.add('WHILE', r'while(?!\w)')
    lg.add('FOR', 'for i in range')
    lg.add('FUNCTION', r'def(?!\w)')
    lg.add('COLON', ':')
    lg.add('OPENPAREN', '\(')
    lg.add('CLOSEPAREN', '\)')
    lg.add('NEWLINE', r'\n')
    lg.add('IMPORT', 'from karel import \*')
    lg.add('BEGIN', 'begin_karel_program')
    lg.add('END', 'end_karel_program')
    lg.add('NOT', r'not(?!\w)')
    #commands
    lg.add('MOVE', 'move')
    lg.add('LEFTTURN', 'turn_left')
    lg.add('PUTBEEPER', 'put_beeper')
    lg.add('PICKBEEPER', 'pick_beeper')
    #conditions
    lg.add('FACENORTH', 'facing_north')
    lg.add('FACESOUTH', 'facing_south')
    lg.add('FACEWEST', 'facing_west')
    lg.add('FACEEAST', 'facing_east')
    lg.add('FRONTCLEAR', 'front_is_clear')
    lg.add('LEFTCLEAR', 'left_is_clear')
    lg.add('RIGHTCLEAR', 'right_is_clear')
    lg.add('PRESENT', 'beepers_present')
    lg.add('INBAG', 'beepers_in_bag')
    lg.add('NOTCHECK', 'not')
    lg.add('IDENTIFIER', '[a-zA-Z_][a-zA-Z0-9_]*')

    # ignore whitespace
    lg.ignore('#.*\n')
    lg.ignore('"""(.|\n)*?"""')
    lg.ignore("'''(.|\n)*?'''")

    lexer = lg.build()

    return lexer
Esempio n. 17
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Print
        self.lexer.add('PRINT', r'print')
        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # Semi Colon
        self.lexer.add('SEMI_COLON', r'\;')
        # Operators
        self.lexer.add('PLUS', r'\+')
        self.lexer.add('MINUS', r'\-')
        self.lexer.add('DIVIDE', r'\\')
        self.lexer.add('MULTIPLY', r'\*')
        # Number
        self.lexer.add('NUMBER', r'\d+')
        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
Esempio n. 18
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # Comma
        self.lexer.add('COMMA', r'\,')
        # Point
        self.lexer.add('POINT', r'\.')
        # Semi Colon
        self.lexer.add('SEMI_COLON', r'\;')
        # Equal simbol
        self.lexer.add('EQUAL', r'\=')
        # Declare
        self.lexer.add('DECLARE', r'DECLARE')
        # Number
        self.lexer.add('NUMBER', r'\d+')
        # Ignore spaces
        self.lexer.ignore('\s+')
        # Funciones
        # Import
        self.lexer.add('IMPORT', r'IMPORT')
        # Enddo
        self.lexer.add('ENDDO', r'Enddo')
        # Call
        self.lexer.add('CALL', r'Call')
        # Inclination
        self.lexer.add('INCLI', r'Inclination')
        # Object
        self.lexer.add('OBJ', r'Object')
        # Sounds
        self.lexer.add('SOUND', r'Sound')
        # Increase
        self.lexer.add('INC', r'Inc')
        # Decrease
        self.lexer.add('DEC', r'Dec')
        # Dow
        self.lexer.add('DOW', r'Dow')
        # Brightness
        self.lexer.add('BRIGHT', r'Brightness')
        # Vibration
        self.lexer.add('VIB', r'Vibration')
        # Move
        self.lexer.add('MOV', r'Move')
        # Temperature
        self.lexer.add('TEMP', r'Temperature')
        # Comment
        self.lexer.add('COMMENT', r'//')
        # Times
        self.lexer.add('TIMES', r'Times')
        # For Cycle
        self.lexer.add('FOR', r'For')
        # End Cycle
        self.lexer.add('FEND', r'Fend')
        # Case
        self.lexer.add('CASE', r'CASE')
        # When
        self.lexer.add('WHEN', r'WHEN')
        # Then
        self.lexer.add('THEN', r'THEN')
        # Else
        self.lexer.add('ELSE', r'ELSE')
        # End Case
        self.lexer.add('END_CASE', r'END CASE')
        # Procedure
        self.lexer.add('PROCEDURE', r'Procedure')
        # Begin
        self.lexer.add('BEGIN', r'begin')
        # End
        self.lexer.add('END', r'end')
        # Main
        self.lexer.add('MAIN', r'Main')
        # Text
        self.lexer.add('TEXT', '[a-zA-Z0-9/]*')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
Esempio n. 19
0
# -*- coding:utf-8 -*-
from rply import  LexerGenerator
from rply.token import BaseBox
lg = LexerGenerator()
# Add takes a rule name, and a regular expression that defines the rule.
#lg.add("COMMENT", r"\s*\*[^\n]*")
#  ([0-9]+)|([0-9]*\.[0-9]+)|(0x[0-9A-Fa-f]+)
lg.add("DATE", r"0d[0-9]{8}")
lg.add("NUMBER", r"(0x[0-9A-Fa-f]+)|([0-9]*\.[0-9]+)|([0-9]+)")
#if
lg.add("IF",r"if|IF")
#lg.add("THEN",r"then|THEN")
lg.add("ELSE",r"ELSE|else")
lg.add("ELSEIF","ELSEIF|elseif")
lg.add("ENDIF","endif|ENDIF")
# do
lg.add("DO", "do|DO")
# do while
lg.add("WHILE",r"while|WHILE")
# end do
lg.add("ENDDO",r"ENDDO|enddo")
# do case
lg.add("CASE",r"case|CASE")
lg.add("ENDCASE", r"ENDCASE|endcase")
# otherwise
lg.add("OTHERWISE",r"otherwise|OTHERWISE")
# exit
lg.add("EXIT",r"exit|EXIT")
# for, for each
lg.add("FOR",r"for|FOR")
lg.add("TO", r"to|TO")
Esempio n. 20
0
class Lexer:
    def __init__(self, code):
        self.code = code
        self.lg = LexerGenerator()
        self.lg.ignore(r'\s+')
        self.lg.add('COMMENT', r';')
        self.lg.add('STRING', r'".*"')
        self.lg.add('STRING', r'\'.*\'')
        self.lg.add('IF', r'if')
        self.lg.add('ELSE', r'else')
        self.lg.add('LPAREN', r'\(')
        self.lg.add('RPAREN', r'\)')
        self.lg.add('LBRACE', r'\{')
        self.lg.add('RBRACE', r'\}')
        self.lg.add('IS_EQUAL_TO', r'==')
        self.lg.add('EQUAL', r'=')
        self.lg.add('GREATER_EQUAL', r'>=')
        self.lg.add('LESSER_EQUAL', r'<=')
        self.lg.add('LESSER', r'<')
        self.lg.add('GREATER', r'>')
        self.lg.add('PLUS', r'-')
        self.lg.add('MINUS', r'\+')
        self.lg.add('COMMA', r',')
        self.lg.add('NUMBER', r'\d+')
        self.lg.add('PRINT', r'print')
        self.lg.add('NAME', r'[a-zA-Z_][a-zA-Z0-9_]*')
        self.lexer = self.lg.build()

    def lex(self):
        tokens = []
        stream = self.lexer.lex(self.code)
        while True:
            try:
                tokens += [stream.next()]
            except StopIteration:
                break
        return tokens
Esempio n. 21
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):

        #Print
        self.lexer.add('OUT', r'out')

        #If
        self.lexer.add('IF', r'if')

        #Parentheses
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')

        #Braces
        self.lexer.add('OPEN_BRACE', r'\{')
        self.lexer.add('CLOSE_BRACE', r'\}')

        #Semicolon
        self.lexer.add('SEMI_COLON', r'\;')

        #Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MULTIPLY', r'\*')

        #Number
        self.lexer.add('NUMBER', r'\d+')

        #Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
Esempio n. 22
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        self.lexer.add('NUMBER', r'\d+')
        # Operators
        self.lexer.add('PLUS', r'\+')
        self.lexer.add('MINUS', r'-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'/')
        # Comp
        self.lexer.add('BIGGER', r'\>')
        self.lexer.add('SMALLER', r'\<')
        self.lexer.add('EQUAL', r'\=')
        self.lexer.add('DIFF', r'\!=')
        self.lexer.add('OPEN_PARENS', r'\(')
        self.lexer.add('CLOSE_PARENS', r'\)')
        self.lexer.add('OPEN_BRACKETS', r'\{')
        self.lexer.add('CLOSE_BRACKETS', r'\}')
        self.lexer.add('SEMI_COLON', r'\;')
        self.lexer.add('QUOTE', r'\"')
        # Vars
        self.lexer.add('ATTRIBUTION', r':=')
        self.lexer.add('VAR', r'var')
        # Else
        self.lexer.add('ELSE', r'SENAO')
        self.lexer.add('ELSE', r'senao')
        # If
        self.lexer.add('IF', r'SE')
        self.lexer.add('IF', r'se')
        # Print
        self.lexer.add('PRINT', r'PRINT')
        self.lexer.add('PRINT', r'print')
        # Identifier
        self.lexer.add('IDENTIFIER', r'[a-zA-Z_][a-zA-Z_0-9]*')
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
Esempio n. 23
0
class Lexer:
    def __init__(self):

        self.lg = LexerGenerator()

        self.lg.ignore(r"\s+")
        self.lg.ignore(r"//.*")

        self.lg.add("PRINT", r"yazdır")
        self.lg.add("LOOP", r"tekrar")
        self.lg.add("READ", r"oku")

        self.lg.add("IF", r"ise")
        self.lg.add("ELSE", r"değil")

        self.lg.add("==", r"==")
        self.lg.add("!=", r"!=")
        self.lg.add("<=", r"<=")
        self.lg.add(">=", r">=")
        self.lg.add(">", r">")
        self.lg.add("<", r"<")
        self.lg.add("+=", r"\+=")
        self.lg.add("-=", r"\-=")
        self.lg.add("=", r"=")

        self.lg.add("STRING", r"'.*'")
        self.lg.add("FLOAT", r"\d+(\.\d+)")  #r"[-]?\d+(\.\d+)"
        self.lg.add("INTEGER", r"\d+")  # [-]?\d+
        self.lg.add("BOOLEAN", r"(doğru|yanlış)")
        self.lg.add("ADD", r"\+")
        self.lg.add("SUB", r"-")
        self.lg.add("MUL", r"\*")
        self.lg.add("DIV", r"\/")
        self.lg.add("MOD", r"\%")

        self.lg.add("(", r"\(")
        self.lg.add(")", r"\)")

        self.lg.add("[", r"\[")
        self.lg.add("]", r"\]")
        self.lg.add("{", r"\{")
        self.lg.add("}", r"\}")
        self.lg.add(",", r",")

        self.lg.add("IDENTIFIER", r"[_\w]*[_\w0-9]+")

    def build(self):
        return self.lg.build()
Esempio n. 24
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        self.lexer.add('PRINT', r'print')
        self.lexer.add('PROGRAM', r'program')
        self.lexer.add('TYPE', r'type')
        self.lexer.add('VAR', r'var')
        self.lexer.add('ARRAY', r'array')
        self.lexer.add('OF', r'of')
        self.lexer.add('FOR', r'for')
        self.lexer.add('TO', r'to')
        self.lexer.add('IF', r'if')
        self.lexer.add('ELSE', r'else')
        self.lexer.add('THEN', r'then')
        self.lexer.add('AND', r'and')
        self.lexer.add('OR', r'or')
        self.lexer.add('NOT', r'not')
        self.lexer.add('BEGIN', r'begin')
        self.lexer.add('END', r'end')
        self.lexer.add('ITYPE', r'integer')
        self.lexer.add('FTYPE', r'double')
        self.lexer.add('DO', r'do')
        self.lexer.add('BTYPE', r'boolean')
        self.lexer.add('FUNCTION', r'function')
        self.lexer.add('BOOLEAN', r'(true|false)')
        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # Brackets
        self.lexer.add('OPEN_BRACKET', r'\[')
        self.lexer.add('CLOSE_BRACKET', r'\]')
        # Semi Colon
        self.lexer.add('SEMI_COLON', r'\;')
        # Assignment
        self.lexer.add('ASSIGNMENT', r'\:\=')
        self.lexer.add('COLON', r'\:')
        # Delimiters
        self.lexer.add('COMMA', r'\,')
        self.lexer.add('RANGE', r'\.\.')
        self.lexer.add('DOT', r'\.')
        # Operators
        self.lexer.add('EQUAL', r'\=')
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'\/')
        self.lexer.add('POINTER', r'\^')
        self.lexer.add('NEQUAL', r'\<\>')
        self.lexer.add('LESS', r'\<')
        self.lexer.add('MORE', r'\>')
        # Number
        self.lexer.add('FTYPE', r'[0-9]+\.[0-9]+')
        self.lexer.add('ITYPE', r'\d+')
        # Ignore spaces
        self.lexer.add('ID', r'\w+')
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
Esempio n. 25
0
from rply import LexerGenerator

lg = LexerGenerator()

lg.add('NUMBER', r'\d+(\.\d+)?')
lg.add('PLUS', r'\+')
lg.add('MINUS', r'-')
lg.add('MUL', r'\*')
lg.add('DIV', r'/')
lg.add('OPEN_PARENS', r'\(')
lg.add('CLOSE_PARENS', r'\)')
lg.add('EQUALS', r'=')
lg.add('SYMBOL', r'[^\s0-9][^\s]*')

lg.ignore(r'\s+')

lexer = lg.build()
Esempio n. 26
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        self.lexer.add('DATA_TYPE', r'(?:double|int|str|bool)(?!\w)')
        self.lexer.add('DOUBLE', r'-?\d+\.\d+')
        self.lexer.add('INTEGER', r'-?\d+')
        self.lexer.add('STRING', r'(?<=")(.+)(?=")')
        self.lexer.add('BOOL', r"true(?!\w)|false(?!\w)")
        self.lexer.add('IF', r'if(?!\w)')
        self.lexer.add('ELSE', r'else(?!\w)')
        self.lexer.add('AND', r"&&")
        self.lexer.add('OR', r"\|\|")
        self.lexer.add('NOT', r"!")
        self.lexer.add('VAR', r'var(?!\w)')
        self.lexer.add('FUNCTION', r'func(?!\w)')
        self.lexer.add('DELETE', r'delete(?!\w)')
        self.lexer.add('CAST', r'cast(?!\w)')
        self.lexer.add('PRINT', r'print(?!\w)')
        self.lexer.add('LOOP', r'loop(?!\w)')
        self.lexer.add('USING', r'using(?!\w)')
        self.lexer.add('IDENTIFIER', r'[a-zA-Z_]+[a-zA-Z0-9_]+')
        self.lexer.add('DOUBLE_EQUAL', r'==')
        self.lexer.add('NOT_EQUAL', r'!=')
        self.lexer.add('GTE', r'>=')
        self.lexer.add('LTE', r'<=')
        self.lexer.add('GT', r'>')
        self.lexer.add('LT', r'<')
        self.lexer.add('EQUAL', r'=')
        self.lexer.add('LBRACKET', r'\[')
        self.lexer.add('RBRACKET', r'\]')
        self.lexer.add('LBRACE', r'\{')
        self.lexer.add('RBRACE', r'\}')
        self.lexer.add('PIPE', r'\|')
        self.lexer.add('COMMA', r'\,')
        self.lexer.add('SEMICOLON', r';')
        self.lexer.add('DOT', r'\.')
        self.lexer.add('COLON', r'\:')
        self.lexer.add('PLUS', r'\+')
        self.lexer.add('MINUS', r'\-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'\/')
        self.lexer.add('MOD', r'\%')
        self.lexer.add('LPAREN', r'\(')
        self.lexer.add('RPAREN', r'\)')
        self.lexer.add('DQUOTE', r'"')

        # ignore whitespace
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
Esempio n. 27
0
#! /usr/bin/env python
# -*- coding: utf-8 -*-
# >>
#     LTPyB, 2016
# <<

from rply import LexerGenerator

lg = LexerGenerator()

lg.add('INTEGER',       r'\-?\d+')
lg.add('FLOAT',         r'\-?\d+\.\d+')
lg.add('OP_ASSIGNMENT', r'=')
lg.add('OP_EQUAL',      r'==')

lg.ignore(r'\s+')    # ignore whitespace
lg.ignore(r'#.*\n')  # ignore comments

lexer = lg.build()
Esempio n. 28
0
File: lexer.py Progetto: jdanford/hy
# Copyright 2017 the authors.
# This file is part of Hy, which is free software licensed under the Expat
# license. See the LICENSE.

from rply import LexerGenerator


lg = LexerGenerator()


# A regexp for something that should end a quoting/unquoting operator
# i.e. a space or a closing brace/paren/curly
end_quote = r'(?![\s\)\]\}])'

identifier = r'[^()\[\]{}\'"\s;]+'

lg.add('LPAREN', r'\(')
lg.add('RPAREN', r'\)')
lg.add('LBRACKET', r'\[')
lg.add('RBRACKET', r'\]')
lg.add('LCURLY', r'\{')
lg.add('RCURLY', r'\}')
lg.add('HLCURLY', r'#\{')
lg.add('QUOTE', r'\'%s' % end_quote)
lg.add('QUASIQUOTE', r'`%s' % end_quote)
lg.add('UNQUOTESPLICE', r'~@%s' % end_quote)
lg.add('UNQUOTE', r'~%s' % end_quote)
lg.add('HASHSTARS', r'#\*+')
lg.add('HASHOTHER', r'#%s' % identifier)

# A regexp which matches incomplete strings, used to support
Esempio n. 29
0
lexer = lg.build()
parser = pg.build()

class BoxInt(BaseBox):
    def __init__(self, value):
        self.value = value

    def getint(self):
        return self.value

'''

from rply import ParserGenerator, LexerGenerator
from rply.token import BaseBox

lexgen = LexerGenerator()

lexgen.add('AND', r"(and)")
lexgen.add('WITHOUT', r"(without)")
lexgen.add('DIVIDE', r"(divide)")
lexgen.add('MULTIPLY', )



keywords = {
        "return": Keyword("RETURN", "RETURN", EXPR_MID),
        "if": Keyword("IF", "IF_MOD", EXPR_BEG),
        "unless": Keyword("UNLESS", "UNLESS_MOD", EXPR_BEG),
        "then": Keyword("THEN", "THEN", EXPR_BEG),
        "elsif": Keyword("ELSIF", "ELSIF", EXPR_BEG),
        "else": Keyword("ELSE", "ELSE", EXPR_BEG),
Esempio n. 30
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Various types
        self.lexer.add('INTTYPE', r'\b[u]{0,1}int(8|16|32|64|)\b')
        self.lexer.add('FLOATTYPE', r'\bfloat(32|64|)\b')
        self.lexer.add('VOID', r'\bvoid\b')
        self.lexer.add('STRINGTYPE', r'\bString\b')
        self.lexer.add('BOOL', r'\bbool\b')

        # Other reserved words
        self.lexer.add('IF', r'\bif\b')
        self.lexer.add('ELSE', r'\belse\b')
        self.lexer.add('WHILE', r'\bwhile\b')
        self.lexer.add('FOR', r'\bfor\b')
        self.lexer.add('CLASS', r'\bclass\b')
        self.lexer.add('OPERATOR', r'\boperator\b')
        self.lexer.add('MUTABLE', r'\bmutable\b')
        self.lexer.add('PUBLIC', r'\bpublic\b')
        self.lexer.add('PRIVATE', r'\bprivate\b')
        self.lexer.add('IMPORT', r'\bimport\b')

        self.lexer.add('RETURN', r'\breturn\b')
        self.lexer.add('EXTENDS', r'\bextends\b')

        self.lexer.add('NEW', r'\bnew\b')

        # Name of function or variable
        self.lexer.add('NAME', r'[a-zA-Z_$][a-zA-Z_$0-9]*')
        # Number
        self.lexer.add('DOUBLE', r'\d+[.]\d+')
        self.lexer.add('INTEGER', r'\d+')
        # Parenthesis, Curly Braces, and Brackets
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        self.lexer.add('OPEN_CURLY', r'\{')
        self.lexer.add('CLOSE_CURLY', r'\}')
        self.lexer.add('OPEN_BRACKET', r'\[')
        self.lexer.add('CLOSE_BRACKET', r'\]')
        # Syntax Helper Bytes
        self.lexer.add('SEMICOLON', r'\;')
        self.lexer.add('COMMA', r'\,')
        self.lexer.add('CHAR', r"\'([^\\\n\r]|\\[rnft\'])\'")
        self.lexer.add('STRING', r'\"[^"]*\"')
        # Operators
        self.lexer.add('!=', '!=')
        self.lexer.add('==', '==')
        self.lexer.add('>=', '>=')
        self.lexer.add('<=', '<=')
        self.lexer.add('++', '[+][+]')
        self.lexer.add('--', '[-][-]')
        self.lexer.add('+=', '[+]=')
        self.lexer.add('-=', '[-]=')
        self.lexer.add('*=', '[*]=')
        self.lexer.add('/=', '[/]=')
        self.lexer.add('>', '[>]')
        self.lexer.add('<', '[<]')
        self.lexer.add('>>', '[>][>]')
        self.lexer.add('<<', '[<][<]')
        self.lexer.add('>>=', '>>=')
        self.lexer.add('<<=', '<<=')
        self.lexer.add('^^', r'\^\^')
        self.lexer.add('&&', r'\&\&')
        self.lexer.add('||', r'\|\|')
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'\/')
        self.lexer.add('XOR', r'\^')
        self.lexer.add('AND', r'\&')
        self.lexer.add('OR', r'\|')
        self.lexer.add('EQ', r'\=')
        self.lexer.add('MOD', r'\%')
        self.lexer.add('MEMBER', r'\.')
        self.lexer.add('NOT', r'\!')
        # the rest
        self.lexer.add('COLON', r'\:')
        # Ignore comments
        self.lexer.ignore(r'\/\*.*\*\/')
        self.lexer.ignore(r'\/\/.*(\n|\r)')
        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
Esempio n. 31
0
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

from rply import LexerGenerator

lg = LexerGenerator()

# A regexp for something that should end a quoting/unquoting operator
# i.e. a space or a closing brace/paren/curly
end_quote = r'(?![\s\)\]\}])'

lg.add('LPAREN', r'\(')
lg.add('RPAREN', r'\)')
lg.add('LBRACKET', r'\[')
lg.add('RBRACKET', r'\]')
lg.add('LCURLY', r'\{')
lg.add('RCURLY', r'\}')
lg.add('QUOTE', r'\'%s' % end_quote)
lg.add('QUASIQUOTE', r'`%s' % end_quote)
lg.add('UNQUOTESPLICE', r'~@%s' % end_quote)
lg.add('UNQUOTE', r'~%s' % end_quote)
Esempio n. 32
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Begin
        self.lexer.add('BEGIN', r'Begin')
        # Nucleo
        self.lexer.add('NUCLEO', r'Nucleo')
        # Si
        self.lexer.add('SI', r'Si')
        # Imprimir
        self.lexer.add('IMPRIMIR', r'Imprimir')
        # Fin
        self.lexer.add('FIN', r'Fin')
        # Parentesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # Semi Colon
        self.lexer.add('IDENTIFICADOR', r'[a-z]+[0-9]*')
        # Operators
        self.lexer.add('IGUAL', r'\=')
        # self.lexer.add('COMPARACION', r'\=\='))
        self.lexer.add('MAS', r'\+')
        # Number
        self.lexer.add('INT', r'\d+')
        # String       
        self.lexer.add('STRING', r'".*"')
        # self.lexer.add('STRING', r'^\"[a-z]+\"$')
        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
Esempio n. 33
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        self.lexer.add('LITERAL', r'"[^\"]*"')
        self.lexer.add('AND', r'and')
        self.lexer.add('NOT', r'not')
        self.lexer.add('OR', r'or')
        self.lexer.add('IF', r'if')
        self.lexer.add('THEN', r'then')
        self.lexer.add('ELSE', r'else')
        self.lexer.add('WHILE', r'while')
        self.lexer.add('DO', r'do')
        self.lexer.add('BREAK', r'break')
        self.lexer.add('CONTINUE', r'continue')
        self.lexer.add('BEGIN', r'\{')
        self.lexer.add('END', r'\}')
        self.lexer.add('INTEGER', r'integer')
        self.lexer.add('FLOAT', r'float')
        self.lexer.add('FUNCTION', r'function')
        self.lexer.add('VAR', r'var')
        self.lexer.add('PROGRAM', r'program')


        self.lexer.add('LEQUAL', r'\<=')
        self.lexer.add('GEQUAL', r'\>=')
        self.lexer.add('NOT_EQUAL', r'\!=')
        self.lexer.add('EQUALS', r'\:=')
        self.lexer.add('PRINT', r'print')
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        self.lexer.add('SEMI_COLON', r'\;')
        self.lexer.add('COLON', r'\:')
        self.lexer.add('COMMA', r'\,')
        self.lexer.add('EQUAL', r'\=')
        self.lexer.add('GTHAN', r'\>')
        self.lexer.add('LTHAN', r'\<')
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'\/')



        self.lexer.add('NUMBER', r'[0-9]+(\.[0-9]+)?')
        self.lexer.add('ID', r'[a-zA-Z]*')
        self.lexer.ignore(r'#[^\#]*#')
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
Esempio n. 34
0
def lexer_from_mapping(mapping):
    lg = LexerGenerator()

    # Escape data with forward slashes
    lg.add("DATA", r'/.+?/')

    # Add the special characters
    for char in mapping.keys():
        lg.add(char, r"\\" + char)

    # Normal tokens
    lg.add("TYPE", r':')
    lg.add("AND", r'\&')
    lg.add("OR", r'\|')
    lg.add("L_PAREN", r'\(')
    lg.add("R_PAREN", r'\)')
    lg.add("EQUAL", r'=')
    lg.add("CHILD", r'>')
    lg.add("PARENT", r'<')
    lg.add("NOT", r'!')

    # Everything else is data
    excluded_chars = r'^<>=&|():!'
    for char in mapping.keys():
        excluded_chars += r"\\" + char
        lg.add("DATA", "[{excluded}]+".format(excluded=excluded_chars))

    lg.ignore(r'\s+')
    lexer = lg.build()
    return lexer
Esempio n. 35
0
 def __init__(self):
     _lg = LexerGenerator()
     for r in grammar:
         _lg.add(r[0], r[1])
     _lg.ignore(r'\s+')
     self._scanner = _lg.build()
Esempio n. 36
0
class Lexer():
	def __init__(self):
		self.lexer = LexerGenerator()
	
	def _add_tokens(self): 
		self.lexer.add('IMPRIMA', r'mostra_ai') #modificado heuhue
		self.lexer.add('ABRE_PAR', r'\(')
		self.lexer.add('FECHA_PAR', r'\)')
		self.lexer.add('PONTO_VIRGULA', r'\;')
		self.lexer.add('SOMA', r'\+')
		self.lexer.add('SUB', r'\-')
		self.lexer.add('NUM', r'\d+')
		self.lexer.add('MULT', r'\*')	#adicionado
		self.lexer.add('DIV', r'\/')	#adicionado
		self.lexer.add('POT',r'\^') 	#adicionado
		self.lexer.add('REST',r'\%') 	#adicionado
		self.lexer.ignore('\s+')
	
	def get_lexer(self):
		self._add_tokens()
		return self.lexer.build() 
Esempio n. 37
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Print
        self.lexer.add('PRINT', r'print')
        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # Semi Colon
        self.lexer.add('SEMI_COLON', r'\;')
        # Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        # Number
        self.lexer.add('NUMBER', r'[-+]?[0-9]*\.?[0-9]+')
        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
Esempio n. 38
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Print
        self.lexer.add('PRINT', r'print')
        self.lexer.add('VAL', r'val')
        # Parenthesis
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        # Semi Colon
        self.lexer.add('SEMI_COLON', r'\;')
        self.lexer.add('EQUAL', r'\=')
        # Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'\-')
        self.lexer.add('MUL', r'\*')
        self.lexer.add('DIV', r'\\')
        # Number
        self.lexer.add('NUMBER', r'\d+')
        self.lexer.add('TEXT', r'[A-Za-z_0-9]*')
        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
Esempio n. 39
0
    'MINUS': r'-',
    'MUL': r'\*',
    'NUMBER_SEP': r'/',
    'EXPR_OPEN': r'\(',
    'EXPR_CLOSE': r'\)',
    'AND': r'&',
    'OR': r'\|',
    'NOT': r'!',
    'EQ': r'\?\s*=',
    'GT': r'>',
    'LT': r'<',
    'BOWL': r':',
    'BOWL_OPEN': r'{',
    'BOWL_CLOSE': r'}',
    'NOODLE_OPEN': r'\[',
    'NOODLE_SEP': r';',
    'NOODLE_CLOSE': r'\]',
    'ASSIGN': r'=',
    'DENO': r'\^',
    'MEM': r'@',
}

lg = LexerGenerator()
for name, regex in op_map.items():
    lg.add(name, regex)

lg.ignore('\s+')
lg.ignore('~\s*#((?!#~).)*#\s*~')

lexer = lg.build()
Esempio n. 40
0
class Syntax:
    def __init__(self) -> None:
        self.lg = LexerGenerator()

    def Build(self):

        self.lg.add(";", ";")
        self.lg.add(".", "\.")
        self.lg.add(",", ",")
        self.lg.add("(", "\(")
        self.lg.add(")", "\)")
        self.lg.add("{", "\{")
        self.lg.add("}", "\}")
        self.lg.add("[", "\[")
        self.lg.add("]", "\]")

        self.lg.add("=", "\=")

        self.lg.add("->", "\-\>")
        self.lg.add("*", "\*")

        self.lg.add("STRING", '["]([^"\\\n]|\\.|\\\n)*["]')

        self.lg.add("&", "\&")
        self.lg.add("*", "\*")
        self.lg.add("@", "\@")

        self.lg.add("NUMBER", "[-]*[0-9]+")

        self.lg.add("STRUCT", "struct ")

        self.lg.add("FN", "fn ")
        self.lg.add("RETURN", "return ")

        self.lg.add("TO", "to ")

        self.lg.add("IDENTIFIER", "[_\w][_\w0-9]*")

        self.lg.ignore("\s+")

        return self.lg.build()
Esempio n. 41
0
    def test_states(self):
        lg = LexerGenerator(initial_state="scalar")
        lg.add("NUMBER", r"\d+")
        lg.add("PLUS", r"\+")
        lg.ignore(r"\s+")

        lg.add("OPEN_BRACKET", r"\[", to_state="vector")
        lg.add("PLUS", r"\+", state="vector")
        lg.add("NUMBER", r"\d+", state="vector")
        lg.add("NEW_LINE", r"\n+", state="vector")
        lg.add("CLOSE_BRACKET", r"\]", state="vector", to_state="scalar")
        lg.ignore(r" +", state="vector")

        l = lg.build()

        stream = l.lex("2 + [ 3 + 4 \n\n 5 + 6 ] + 7")
        tokens = [
            ("NUMBER", "2", "scalar"),
            ("PLUS", "+", "scalar"),
            ("OPEN_BRACKET", "[", "scalar"),
            ("NUMBER", "3", "vector"),
            ("PLUS", "+", "vector"),
            ("NUMBER", "4", "vector"),
            ("NEW_LINE", "\n\n", "vector"),
            ("NUMBER", "5", "vector"),
            ("PLUS", "+", "vector"),
            ("NUMBER", "6", "vector"),
            ("CLOSE_BRACKET", "]", "vector"),
            ("PLUS", "+", "scalar"),
            ("NUMBER", "7", "scalar"),
        ]

        for compare_token, token in zip(tokens, stream):
            name, value, state = compare_token
            assert token.name == name
            assert token.value == value
            assert token.state == state
Esempio n. 42
0
from rply import LexerGenerator

分词器母机 = LexerGenerator()

分词器母机.add('数', r'\d+')

分词器母机.add('加', r'\+')
分词器母机.add('减', r'-')

分词器 = 分词器母机.build()

for 词 in 分词器.lex('1+1-1'):
    print(词)
Esempio n. 43
0
from rply import ParserGenerator, LexerGenerator
import box


lg = LexerGenerator()
lg.add("LPAREN", r"\(")
lg.add("RPAREN", r"\)")
lg.add("QUOTE", r"'")
lg.add("ATOM", r"[^\s()]+")
lg.ignore(r"\s+")

pg = ParserGenerator(["QUOTE", "LPAREN", "RPAREN", "ATOM"],
                     precedence=[],
                     cache_id="wasp")


@pg.error
def error_handler(token):
    type = token.gettokentype()
    pos = token.getsourcepos()
    if pos is None:
        raise ValueError("unexpected %s" % type)
    else:
        raise ValueError("unexpected %s at (%s, %s)" %
                        (type, pos.lineno, pos.colno))


@pg.production("main : sexpr")
def main(p):
    return p[0]
Esempio n. 44
0
File: lexer.py Progetto: 0atman/hy
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

from rply import LexerGenerator


lg = LexerGenerator()


# A regexp for something that should end a quoting/unquoting operator
# i.e. a space or a closing brace/paren/curly
end_quote = r'(?![\s\)\]\}])'


lg.add('LPAREN', r'\(')
lg.add('RPAREN', r'\)')
lg.add('LBRACKET', r'\[')
lg.add('RBRACKET', r'\]')
lg.add('LCURLY', r'\{')
lg.add('RCURLY', r'\}')
lg.add('QUOTE', r'\'%s' % end_quote)
lg.add('QUASIQUOTE', r'`%s' % end_quote)
Esempio n. 45
0
    def __init__(self):
        lg = LexerGenerator()
        tokens = [
            ("PROTO", r"[a-zA-Z]+://[^ ]+"),
            ("INT", r"\d+"),
            ("STRING", r"'[^']+'|\"[^\"]+\""),
            ("NAME", r"--colors=always"),
            ("PATH", r"([a-zA-Z0-9/._-]|\\ )+"),
            ("PATH", r"~([a-zA-Z0-9/._-]|\\ )*"),
            ("NAME", r"([a-zA-Z0-9_-]|\\ )+"),
            ("SEMICOLON", r";"),
            ("ENDL", r"\r?\n"),
        ]

        for token in tokens:
            lg.add(*token)

        lg.ignore(r"[ 	]+")

        pg = ParserGenerator([x[0] for x in tokens])

        @pg.production("main : statements")
        def main(args):
            return args[0]

        @pg.production("statements : statement")
        def statements_one(args):
            expression, = args
            return {
                "type": "statement",
                "content": expression,
            }

        @pg.production("statements : statement separator statements")
        def statements_many(args):
            statement, separtor, statements = args
            return {
                "type": "statement_infix_operator",
                "content": {
                    "left": {
                        "type": "statement",
                        "content": statement,
                    },
                    "right": statements,
                    "operator": separtor,
                }
            }

        @pg.production("separator : SEMICOLON")
        @pg.production("separator : ENDL")
        def separator(args):
            # don't care
            return args[0].value

        @pg.production("statement : atom")
        def expression_one(args):
            atom, = args
            return [atom]

        @pg.production("statement : atom atoms")
        def expression_many(args):
            atom, atoms = args
            return [atom] + atoms

        @pg.production("atoms : atom")
        def atoms_one(args):
            atom, = args
            return [atom]

        @pg.production("atoms : atom atoms")
        def atoms_many(args):
            atom, atoms = args
            return [atom] + atoms

        @pg.production("atom : NAME")
        @pg.production("atom : INT")
        @pg.production("atom : STRING")
        @pg.production("atom : PATH")
        @pg.production("atom : PROTO")
        def atom(args):
            name, = args
            return name.value

        self.pg = pg
        self.lg = lg

        self.lexer = self.lg.build()
        self.parser = self.pg.build()
Esempio n. 46
0
import re
import itertools
from collections import deque
from rply import ParserGenerator, LexerGenerator
from graphextractor.rfc3987 import UrlPattern
from graphextractor.flattened import flattened

__all__ = ['TweetLexer', 'TweetParser']

lex = LexerGenerator()
lex.ignore(ur'(?:[,;\s]+|\band\b|\bor\b)+')
lex.add(u'URL', UrlPattern)
lex.add(u'BTHASH', ur'#betterthan')
lex.add(u'IBTHASH', ur'#isbetterthan')
lex.add(u'HASHTAG', ur'#[a-zA-Z0-9_]+')
lex.add(u'MENTION', ur'@[a-zA-Z0-9_]+')
lex.add(u'FOR', ur'(for|FOR|For)')
lex.add(u'WORD', ur'[\w]+')

pg = ParserGenerator([u'URL',
                      u'BTHASH',
                      u'IBTHASH',
                      u'HASHTAG',
                      u'MENTION',
                      u'FOR',
                      u'WORD'
                     ], 
                     cache_id=u'graphextractor.tweetparser')

@pg.production("betterthan : words URL bthash URL topics words")
def betterthan(p):
Esempio n. 47
0
from rply import LexerGenerator

lg = LexerGenerator()

lg.add("PLUS", r"\+")
lg.add("MINUS", r"-")
lg.add("MUL", r"/")
lg.add("DIV", r"\*")

lg.add("NUMBER", r"\d+")

lg.ignore(r"\s+")

lexer = lg.build()
Esempio n. 48
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        #Select
        self.lexer.add('SELECT', r'select')

        #From
        self.lexer.add('FROM', r'from')

        #Where
        self.lexer.add('WHERE', r'where')

        #Separador
        self.lexer.add('SEP', r',')

        #Boxplot
        self.lexer.add('BOXPLOT', r'boxplot')

        #Strings alfanumericas
        self.lexer.add('STR', r'\w+')
        #
        #        #Col_select
        #        self.lexer.add('COL_SEL',r'\w+')
        #
        #        #data_from
        #        self.lexer.add('DAT_FR',r'\w+')
        #
        #        #Col_xbox
        #        self.lexer.add('COL_XBOX',r'\w+')
        #
        #        #Col_ybox
        #        self.lexer.add('COL_YBOX',r'\w+')

        #Maior
        self.lexer.add('GRT', r'>')

        #Menor
        self.lexer.add('LSS', r'<')

        #Igual
        self.lexer.add('EQ', r'=')

        #Numero
        self.lexer.add('NUM', r'\d')

        #Ignorar espaços em branco
        self.lexer.ignore(r'\s+')

        #Ignorar comentários
        #de uma linha (#)
        self.lexer.ignore('#(.)*\n')

        #de multiplas linhas (/* */)
        self.lexer.ignore('/\*(?s).*\*/')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
Esempio n. 49
0
from rply import LexerGenerator

lg = LexerGenerator()

lg.add("ELLIPSIS", r"\.\.\.")
lg.add("NUMBER", r"\d+")
lg.add("EQUALS", r"=")
lg.add("WORD", r"[a-z]+")

lg.ignore(r"\s+")  # Ignore whitespace
lg.ignore(r"#.*\n")  # Ignore comments

lexer = lg.build()
Esempio n. 50
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Parenthesis
        self.lexer.add('OPEN_PAR', r'\(')
        self.lexer.add('CLOSE_PAR', r'\)')
        self.lexer.add('OPEN_KEY', r'\{')
        self.lexer.add('CLOSE_KEY', r'\}')
        # Semi Colon
        self.lexer.add('SEMI_COLON', r'\;')
        # Comma
        self.lexer.add('COMMA', r'\,')
        # Quote
        self.lexer.add('QUOTE', r'\"')
        # Dots
        self.lexer.add('DOTS', r'\:')
        # Operators
        self.lexer.add('SUM', r'\+')
        self.lexer.add('SUB', r'-')
        self.lexer.add('MULT', r'\*')
        self.lexer.add('DIV', r'/')
        # Boolean Operators
        self.lexer.add('OR', r'\//')
        self.lexer.add('AND', r'\&&')
        self.lexer.add('NOT', r'\!')
        # Relational Operators
        self.lexer.add('BIGGER_THAN', r'\>')
        self.lexer.add('SMALLER_THAN', r'\<')
        self.lexer.add('EQUAL_TO', r'\==')
        self.lexer.add('DIFF', r'\!=')
        self.lexer.add('ASSIGN', r'=')
        # Number
        self.lexer.add('NUMBER', r'\d+')
        # Types 
        self.lexer.add('INT', r'IN')
        self.lexer.add('CHAR', r'CH')
        self.lexer.add('VOID', r'VO')
        # While
        self.lexer.add('WHILE', r'W')
        # If - else
        self.lexer.add('IF', r'I')
        self.lexer.add('ELSE', r'E')
        # Print
        self.lexer.add('PRINT', r'P')
        # Scanf
        self.lexer.add('SCANF', r'S')
        # Function
        self.lexer.add('FUNC', r'F')
        # Main
        self.lexer.add('MAIN', r'M')
        # Identifier
        self.lexer.add('IDENTIFIER', "[a-zA-Z_][a-zA-Z0-9_]*")
        # Ignore spaces
        self.lexer.ignore('\s+')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
Esempio n. 51
0
qualified = WORD , OP , string ;
unqualified = string ;
quick = QUICK , string ;
variable = DOLLAR , string ;

combination = query , [ ( AND | OR ) ] , query ;

not = NOT | MINUS ;
inverted_query = NOT , query ;

subquery = LPAREN , query , RPAREN ;

string = ( WORD | STRING ) ;
"""

lg = LexerGenerator()
lg.add('AND', r'AND')
lg.add('OR', r'OR')
lg.add('NOT', r'NOT')
lg.add('WORD', r'[^:"\'()\s=~<>\-#@/$][^:)\s=~<>]*')
lg.add('STRING', r'([\'"])(?:(?!\1|\\).|\\.)*\1')
lg.add('MINUS', r'-')
lg.add('LPAREN', r'\(')
lg.add('RPAREN', r'\)')
lg.add('OP', r'[:=<>~]')
lg.add('QUICK', r'[#@/]')
lg.add('DOLLAR', r'\$')

lg.ignore(r'\s+')

pg = ParserGenerator([rule.name for rule in lg.rules],
Esempio n. 52
0
from rply import LexerGenerator

lexer_gen = LexerGenerator()

lexer_gen.ignore(r"([\s\f\t\n\r\v]+)|#.*$")


def get_tokens():
    return [
        # Keywords
        ("IF", r"if\b"),
        ("PRINT", r"print\b"),
        ("FN", r"fn\b"),
        ("WHILE", r"while\b"),
        ("RETURN", r"return\b"),
        ("LET", r"let\b"),
        ("BREAK", r"break\b"),
        ("CONTINUE", r"continue\b"),
        ("ASYNC", r"async\b"),
        # Channel Operators
        ("CHAN_OUT", r"<:"),
        ("CHAN_IN", r"<-"),
        # Arithmetic Operators
        ("MUL", r"\*"),
        ("DIV", r"/"),
        ("MOD", r"%"),
        ("PLUS", r"\+"),
        ("MINUS", r"-"),
        # Logical Operators
        ("AND", r"and\b"),
        ("OR", r"or\b"),
Esempio n. 53
0
 def __init__(self) -> None:
     self.lg = LexerGenerator()
Esempio n. 54
0
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

from rply import LexerGenerator


lg = LexerGenerator()


# A regexp for something that should end a quoting/unquoting operator
# i.e. a space or a closing brace/paren/curly
end_quote = r'(?![\s\)\]\}])'


lg.add('LPAREN', r'\(')
lg.add('RPAREN', r'\)')
lg.add('LBRACKET', r'\[')
lg.add('RBRACKET', r'\]')
lg.add('LCURLY', r'\{')
lg.add('RCURLY', r'\}')
lg.add('QUOTE', r'\'%s' % end_quote)
lg.add('QUASIQUOTE', r'`%s' % end_quote)
Esempio n. 55
0
""":mod:`stencil_lang.matrix.lexer` -- Matrix scanner
"""

from rply import LexerGenerator

from stencil_lang.matrix.tokens import TOKENS, IGNORES

lg = LexerGenerator()

for rule_name, regex in TOKENS.iteritems():
    lg.add(rule_name, regex)

for regex in IGNORES:
    lg.ignore(regex)

# This has to be called outside a function because the parser must be generated
# in Python during translation, not in RPython during runtime.
_lexer = lg.build()
"""This intepreter's lexer instance."""


def lex(text):
    """Scan text using the generated lexer.

    :param text: text to lex
    :type text: :class:`str`
    :return: parsed stream
    :rtype: :class:`rply.lexer.LexerStream`
    """
    return _lexer.lex(text)
Esempio n. 56
0
class Lexer():
    def __init__(self):
        self.lexer = LexerGenerator()

    def _add_tokens(self):
        # Reserved Words
        self.lexer.add('FROM', r'from')
        self.lexer.add('IMPORT', r'import')
        self.lexer.add('AS', r'as\ ')
        self.lexer.add('LET', r'let')

        # Other characters
        self.lexer.add('OPEN_PAREN', r'\(')
        self.lexer.add('CLOSE_PAREN', r'\)')
        self.lexer.add('OPEN_CURLY_BRACKET', r'\{')
        self.lexer.add('CLOSE_CURLY_BRACKET', r'\}')
        self.lexer.add('OPEN_BRACKET', r'\[')
        self.lexer.add('CLOSE_BRACKET', r'\]')
        self.lexer.add('FORWARD_SLASH', r'\/')
        #self.lexer.add('BACKWARD_SLASH', r'\\')
        self.lexer.add('COLON', r'\:')
        self.lexer.add('SEMICOLON', r'\;')
        self.lexer.add('COMMA', r'\,')
        self.lexer.add('PERIOD', r'\.')
        #self.lexer.add('DOLLAR_SIGN', r'$')
        #self.lexer.add('HASH', r'\#')

        self.lexer.add('EQUAL', r'=')
        self.lexer.add('TILDE', r'~')
        self.lexer.add('EXCLAMATION', r'!')
        self.lexer.add('NUMBER', r'-?\d+')
        self.lexer.add('AMINO_ACID_SEQUENCE', r'\$[ACDEFGHIKLMNPQRSTVWY\*]+')
        self.lexer.add('IDENTIFIER', r'\w[\w\d\_\-]*')

        # Ignore spaces
        self.lexer.ignore('\s+')

        # Comments - Ignore multiline comments in c syntax
        # Example: /* This is a comment! */
        self.lexer.ignore(r'/\*([\s\S]*?)\*/\s*')

        # Comments - Ignore remainder of line starting with "#".
        self.lexer.ignore(r'#.*\n')

    def get_lexer(self):
        self._add_tokens()
        return self.lexer.build()
Esempio n. 57
0
from __future__ import print_function

import re
import ast
import collections

from transit.transit_types import Keyword, Symbol, TaggedValue, List, Vector
import transit.transit_types

transit_true = transit.transit_types.true
transit_false = transit.transit_types.false

from rply import ParserGenerator, LexerGenerator

lg = LexerGenerator()

SYMBOL_RE = r"[\.\*\+\!\-\_\?\$%&=a-zA-Z][\.\*\+\!\-\_\?\$%&=a-zA-Z0-9:#]*"
NS_SYMBOL = SYMBOL_RE + "/" + SYMBOL_RE

lg.add("boolean", r"(true|false)")
lg.add("nil", r"nil")
lg.add("float", r"\d+\.\d+")
lg.add("number", r"[-+]?\d+")
lg.add("olist", r"\(")
lg.add("clist", r"\)")
lg.add("omap", r"{")
lg.add("cmap", r"}")
lg.add("ovec", r"\[")
lg.add("cvec", r"\]")
lg.add("oset", r"#{")
lg.add("colon", r":")
Esempio n. 58
0
from rply import LexerGenerator

lg = LexerGenerator()

lg.add('number', r'\-?[0-9]+')
lg.add('add', r'\+')
lg.add('sub', r'\-')
lg.add('ioperator', r'(==|\>=|\<=|\!=|>|<)')
lg.add('equals', r'\=')
lg.add('private', r'local')
lg.add('if', r'if')
lg.add('for', r'for')
lg.add('def', r'def')
lg.add('class', r'class')
lg.add('new', r'new')
lg.add('leftbracket', r'\{')
lg.add('rightbracket', r'\}')
lg.add('end', r'end')
lg.add('parenth', r'\(.*?(\)|\s)+')
lg.add('variablenam', r'[A-Za-z0-9_.:]+')
lg.add('string', r'".+"')
lg.ignore(r'\s+')

l = lg.build()
Esempio n. 59
0
from rply import LexerGenerator


lg = LexerGenerator()

lg.add("LPAREN", r"\(")
lg.add("RPAREN", r"\)")
# lg.add('LBRACKET', r'\[')
# lg.add('RBRACKET', r'\]')

lg.add("IDENTIFIER", r"[^()\[\]{}\s#]+")

lg.ignore(r"#.*(?=\r|\n|$)")
lg.ignore(r"\s+")

lexer = lg.build()
Esempio n. 60
0
# Copyright 2019 the authors.
# This file is part of Hy, which is free software licensed under the Expat
# license. See the LICENSE.

from rply import LexerGenerator

lg = LexerGenerator()

# A regexp for something that should end a quoting/unquoting operator
# i.e. a space or a closing brace/paren/curly
end_quote_set = r'\s\)\]\}'
end_quote = r'(?![%s])' % end_quote_set

identifier = r'[^()\[\]{}\'"\s;]+'

lg.add('LPAREN', r'\(')
lg.add('RPAREN', r'\)')
lg.add('LBRACKET', r'\[')
lg.add('RBRACKET', r'\]')
lg.add('LCURLY', r'\{')
lg.add('RCURLY', r'\}')
lg.add('HLCURLY', r'#\{')
lg.add('QUOTE', r'\'%s' % end_quote)
lg.add('QUASIQUOTE', r'`%s' % end_quote)
lg.add('UNQUOTESPLICE', r'~@%s' % end_quote)
lg.add('UNQUOTE', r'~%s' % end_quote)
lg.add('ANNOTATION', r'\^(?![=%s])' % end_quote_set)
lg.add('DISCARD', r'#_')
lg.add('HASHSTARS', r'#\*+')
lg.add(
    'BRACKETSTRING', r'''(?x)