Exemple #1
0
 def __init__(self):
     self.lexer = Lexer()
     self.token_generator = None
     self.current_token = None
     self.current_level = -1
     self.table = SymTable()
     self.pcode = PCodeManager()
Exemple #2
0
 def _test_lex_impl(self, buf, expected_tokens):
     self.lexer = Lexer(buf)
     while True:
         tok = self.lexer.lex_token()
         self.tokens.append(tok)
         if tok.type == TokenType.EOF:
             break
     self.assertEqual(len(self.tokens), len(expected_tokens))
     for t, exp in zip(self.tokens, expected_tokens):
         self.assertEqual(t,
                          exp,
                          msg="Got=({0}), Expected=({1})".format(t, exp))
Exemple #3
0
def main():
    with open(sys.argv[1], 'r') as f:
        data = f.read()
        lexer = Lexer(data, sys.argv[1])
        while True:
            try:
                line, kind, token = lexer.get_next_token()
                print('[%2d] [%-10s] %s' %
                      (line, kind_to_category(kind), token))
                if kind == TokenKind.EOF:
                    break
            except Exception as e:
                sys.exit(e)
    def compile_code(code, output_type="exe", compiler_opts=CompilerOptions()):
        """
        supported_output_types = [
            "exe",
            "ll",
            "wasm",
            "ast",
            "sema",
            "lowered_ast",
            "tokens",
        ]
        """

        if output_type == "tokens":
            tokens = Lexer(code, compiler_opts).lex()
            result = json_dumps(tokens)

        elif output_type == "ast":
            ast = Parser.from_code(code, compiler_opts).parse()
            result = json_dumps(ast)

        elif output_type == "sema":
            tokens = Lexer(code, compiler_opts).lex()
            ast = Parser(tokens, compiler_opts).parse()
            semantic_info = SemanticAnalyzer(ast, tokens,
                                             compiler_opts).analyze()
            result = json_dumps(semantic_info)

        elif output_type == "ll":
            compiler_opts.target_code = "llvm"
            tokens = Lexer(code, compiler_opts).lex()
            ast = Parser(tokens, compiler_opts).parse()
            semantic_info = SemanticAnalyzer(ast, tokens,
                                             compiler_opts).analyze()
            llvm = LLVMCodegen(ast, semantic_info).generate()
            result = llvm.dumps()

        elif output_type == "wasm":
            compiler_opts.target_code = "wasm"
            tokens = Lexer(code, compiler_opts).lex()
            ast = Parser(tokens, compiler_opts).parse()
            semantic_info = SemanticAnalyzer(ast, tokens,
                                             compiler_opts).analyze()
            result = json_dumps(semantic_info)

        else:
            click.echo("Unimplemented Output Type!")
            return

        click.echo(result)
def main():
    with open('example.gg', 'r') as f:
        text_input = f.read()

    lexer = Lexer().get_lexer()
    tokens = lexer.lex(text_input)

    cg = CodeGen()
    pg = Parser(cg)
    pg.parse()
    parser = pg.get_parser()
    parser.parse(tokens, state=ParserState()).generate()

    cg.create_ir()
    cg.save_ir('output/output.ll')
    print(cg.run(False))
 def compile(chunk, chunk_name):
     parser = Parser()
     lexer = Lexer(chunk, chunk_name)
     ast = parser.parse_block(lexer)
     # print(ast)
     proto = Codegen.gen_proto(ast)
     # proto.print_code()
     LuaCompiler.set_source(proto, chunk_name)
     return proto
Exemple #7
0
 def _test_parse_impl(self, buf, expected_exprs):
     lexer = Lexer(buf)
     tokens = list()
     while True:
         tok = lexer.lex_token()
         tokens.append(tok)
         if tok.type == TokenType.EOF:
             break
     self.parser = Parser(tokens)
     while True:
         expr = self.parser.parse_top_level_expr()
         if expr is None:
             break
         self.exprs.append(expr)
     for e in self.exprs:
         print(e)
     self.assertEqual(len(self.exprs), len(expected_exprs))
     for e, exp in zip(self.exprs, expected_exprs):
         self.assertEqual(e, exp)
def run():
    lexer = Lexer('primeiro_portugolo.ptgl')
    parser = Parser(lexer)
    # token = lexer.next_token()
    #
    # while token and token.tag != Tag.END_OF_FILE:
    #     print(str(token))
    #     token = lexer.next_token()
    #
    # print("\n\n\nSymbol Table:")
    # lexer.print_symbol_table()

    parser.compilador()
Exemple #9
0
    def parse_input(self, path: str):
        f = open(path, 'r')
        InputHandler.set_input_text(f.read())
        Logger.info(
            '-------------------------------------------------------------')
        Logger.info(
            '------------------- Starting parsing phase ------------------')
        Logger.info(
            '-------------------------------------------------------------')
        lexer = Lexer()
        Logger.info('* Starting lexing')
        tokens = lexer.lex_input()
        Logger.info('- Lexing DONE')

        Logger.debug('*** Printing lexed tokens: ***')
        for i, t in enumerate(tokens):
            Logger.debug('{token_type}::{value}'.format(
                token_type=t.token_type, value=t.value))
        if len(lexer.lex_errors) > 0:
            for e in lexer.lex_errors:
                Logger.error(e)
            sys.exit(1)

        tr = TokenReader(tokens)
        parser = Parser(tr)
        Logger.info('* Starting parsing')
        ast = parser.parse_spl()
        Logger.info('- Parsing DONE')
        Logger.info('*** Pretty printing AST: ***')
        Logger.info('\n' + ast.indented_print())

        if len(parser.errors) > 0:
            for e in parser.errors:
                Logger.error(e)
            sys.exit(1)
        return ast
Exemple #10
0
    def test_lexer_on_inputfile(self):
        path = self.__get_path("example.nl")
        with open(path, "r") as f:
            lexer = Lexer(inFile=f)
        expected_tokens = [
            Token('ID', 'x', 1),
            Token('ASSIGN', '=', 1),
            Token('NUMBER', 4, 1),
            Token('SEMICOLON', ';', 1),
            Token('FUNC', 'func', 2),
            Token('ID', 'random', 2),
            Token('LPAREN', '(', 2),
            Token('RPAREN', ')', 2),
            Token('INT', 'int', 2),
            Token('LBRACE', '{', 2),
            Token('RETURN', 'return', 3),
            Token('NUMBER', '5', 3),
            Token('SEMICOLON', ';', 3),
            Token('RBRACE', '}', 4)
        ]

        for i in range(0, len(lexer.tokens)):
            self.assertEqual(expected_tokens[i], lexer.tokens[i])
Exemple #11
0
from compiler.lexer import Lexer
from compiler.parser import Parser, ParserState
from compiler.JSONparsedTree import Node, write
from compiler.codegen import CodeGen
from rply.lexer import LexerStream
from copy import copy
from pprint import pprint
import traceback
import json

input_file = open('input.code').read()

lexer = Lexer().build()
tokens: LexerStream
try:
    tokens = lexer.lex(input_file)
    tokenType = map(lambda x: x.gettokentype(), copy(tokens))
    tokenName = map(lambda x: x.getstr(), copy(tokens))
    pprint(list(copy(tokens)))
except (BaseException, Exception):
    traceback.print_exc()
finally:
    print("\n\nCompile log:")

codegen = CodeGen()
module = codegen.module
builder = codegen.builder
printf = codegen.printf

SymbolTable = ParserState()
syntaxRoot: Node
Exemple #12
0
from compiler.lexer import (Lexer)

if __name__ == '__main__':
    lexer = Lexer('hello world')
    print(lexer.lex())
Exemple #13
0
class Parser:
    """ Parser for PL/0 grammar
    You need to create an instance of parser for each program
    """
    def __init__(self):
        self.lexer = Lexer()
        self.token_generator = None
        self.current_token = None
        self.current_level = -1
        self.table = SymTable()
        self.pcode = PCodeManager()

    def load_program(self, program):
        self.lexer.load_program(program)
        self.token_generator = self.lexer.get_symbol()

    def analyze(self):
        try:
            self._program()
            # print('Compile Successful!')
            for ln, line in enumerate(self.pcode):
                # print('[%d]' % ln, line)
                print(line)
            return self.pcode.get()
        except CompilerError as e:
            e.pos = self.lexer.pos
            print('[%d] %s' % (e.pos[0], self.lexer.get_line(e.pos[0])),
                  file=sys.stderr)
            print('*** %s at %s' % (e.message, str(e.pos)), file=sys.stderr)

    def _program(self):
        """ The following is rec-descent parser.
        Each handler will move forward 1 token before return, therefore self.current_token is assigned at
        the beginning of each function.
        """
        self._forward()
        self.table.enter(Record())
        self._block(3)
        self._expect(Token(None, '.'))

    def _block(self, dx):
        def _const():
            def _const_decl():
                self._expect(Token('IDENTIFIER', None))
                record.name = self.current_token.value
                self._forward()
                self._expect(Token(None, '='))
                self._forward()
                self._expect(Token('NUMBER', None))
                record.value = int(self.current_token.value)
                self.table.enter(record)
                self._forward()

            record = Record('const', None, None, 0)
            self._expect(Token(None, 'const'))
            self._forward()
            while True:
                _const_decl()
                if self.current_token.value == ',':
                    self._forward()
                else:
                    break
            self._expect(Token(None, ';'))
            self._forward()

        def _var():
            def _var_decl():
                nonlocal dx
                self._expect(Token('IDENTIFIER', None))
                record.name = self.current_token.value
                record.address = dx
                dx += 1
                self.table.enter(record)
                self._forward()

            record = Record('var', None, None, self.current_level)
            self._expect(Token(None, 'var'))
            self._forward()
            while True:
                _var_decl()
                if self.current_token.value == ',':
                    self._forward()
                else:
                    break
            self._expect(Token(None, ';'))
            self._forward()
            return dx

        def _procedure():
            record = Record('procedure', None, None, self.current_level)
            while self.current_token.value == 'procedure':
                self._forward()
                self._expect(Token('IDENTIFIER', None))
                record.name = self.current_token.value
                self.table.enter(record)
                self._forward()
                self._expect(Token(None, ';'))
                self._forward()
                self._block(3)
                self._expect(Token(None, ';'))
                self._forward()

        self.current_level += 1
        tx0 = len(self.table) - 1  # should be a procedure record
        code1 = len(self.pcode)  # record the
        self.pcode.gen(OpCode.JMP, 0, 0)

        if self.current_token.value == 'const':
            _const()
        if self.current_token.value == 'var':
            _var()
        if self.current_token.value == 'procedure':
            _procedure()
        self.pcode[code1].a = len(self.pcode)  # fill back the JMP inst
        self.table[tx0].address = len(
            self.pcode)  # this value will be used by call
        self.pcode.gen(OpCode.INT, 0, dx)
        self._statement()
        self.pcode.gen(OpCode.OPR, 0, 0)
        self.current_level -= 1
        self.table[tx0 + 1:] = []

    def _statement(self):
        if self.current_token.type == 'IDENTIFIER':
            record = self.table.get(self.current_token.value, 'var')
            self._forward()
            self._expect(Token(None, ':='))
            self._forward()
            self._expression()
            self.pcode.gen(OpCode.STO, self.current_level - record.level,
                           record.address)

        elif self.current_token.value == 'if':
            self._forward()
            self._condition()
            self._expect(Token(None, 'then'))
            self._forward()
            code1 = len(self.pcode)
            self.pcode.gen(OpCode.JPC, 0, 0)
            self._statement()  # then statement
            code2 = len(self.pcode)
            self.pcode.gen(OpCode.JMP, 0, 0)
            if self.current_token.value == 'else':
                self._forward()
                self.pcode[code1].a = len(self.pcode)
                self._statement()  # else statement
            else:
                self.pcode[code1].a = len(self.pcode)
            self.pcode[code2].a = len(self.pcode)

        elif self.current_token.value == 'while':
            code1 = len(self.pcode)
            self._forward()
            self._condition()
            code2 = len(self.pcode)
            self.pcode.gen(OpCode.JPC, 0, 0)
            self._expect(Token(None, 'do'))
            self._forward()
            self._statement()
            self.pcode.gen(OpCode.JMP, 0, code1)
            self.pcode[code2].a = len(self.pcode)

        elif self.current_token.value == 'call':
            self._forward()
            self._expect(Token('IDENTIFIER', None))
            record = self.table.get(self.current_token.value, 'procedure')
            self.pcode.gen(OpCode.CAL, self.current_level - record.level,
                           record.address)
            self._forward()

        elif self.current_token.value == 'begin':
            self._forward()
            self._statement()
            while self.current_token.value == ';':
                self._forward()
                self._statement()
            self._expect(Token(None, 'end'))
            self._forward()

        elif self.current_token.value == 'repeat':
            self._forward()
            code1 = len(self.pcode)
            self._statement()
            while self.current_token.value == ';':
                self._forward()
                self._statement()
            self._expect(Token(None, 'until'))
            self._forward()
            self._condition()
            self.pcode.gen(OpCode.JPC, 0, code1)

        elif self.current_token.value == 'read':
            self._forward()
            self._expect(Token(None, '('))
            self._forward()
            while True:
                self._expect(Token('IDENTIFIER', None))
                record = self.table.get(self.current_token.value, 'var')
                self.pcode.gen(OpCode.RED, self.current_level - record.level,
                               record.address)
                self._forward()
                if self.current_token.value != ',':
                    break
                else:
                    self._forward()
            self._expect(Token(None, ')'))
            self._forward()

        elif self.current_token.value == 'write':
            self._forward()
            self._expect(Token(None, '('))
            self._forward()
            while True:
                self._expression()
                self.pcode.gen(OpCode.WRT, 0, 0)
                if self.current_token.value != ',':
                    break
                else:
                    self._forward()
            self._expect(Token(None, ')'))
            self._forward()

    def _condition(self):
        if self.current_token.value == 'odd':
            self._forward()
            self._expression()
            self.pcode.gen(OpCode.OPR, 0, 6)
        else:
            self._expression()
            self._expect(Token('RELATIONAL_OPERATOR', None))
            op = self.current_token.value
            self._forward()
            self._expression()
            if op == '=':
                self.pcode.gen(OpCode.OPR, 0, 7)
            elif op == '<>':
                self.pcode.gen(OpCode.OPR, 0, 8)
            elif op == '<':
                self.pcode.gen(OpCode.OPR, 0, 9)
            elif op == '>=':
                self.pcode.gen(OpCode.OPR, 0, 10)
            elif op == '>':
                self.pcode.gen(OpCode.OPR, 0, 11)
            elif op == '<=':
                self.pcode.gen(OpCode.OPR, 0, 12)

    def _expression(self):
        if self.current_token.type == 'PLUS_OPERATOR':  # unary operator
            op = self.current_token.value
            self._forward()
            self._term()
            if op == '-':
                self.pcode.gen(OpCode.OPR, 0, 1)
        else:
            self._term()
        while self.current_token.type == 'PLUS_OPERATOR':  # binary operator
            op = self.current_token.value
            self._forward()
            self._term()
            if op == '+':
                self.pcode.gen(OpCode.OPR, 0, 2)
            else:
                self.pcode.gen(OpCode.OPR, 0, 3)

    def _term(self):
        self._factor()
        while self.current_token.type == 'MULTIPLY_OPERATOR':
            op = self.current_token.value
            self._forward()
            self._factor()
            if op == '*':
                self.pcode.gen(OpCode.OPR, 0, 4)
            else:
                self.pcode.gen(OpCode.OPR, 0, 5)

    def _factor(self):
        if self.current_token.type == 'IDENTIFIER':
            record = self.table.get(self.current_token.value)
            if record.type == 'const':
                self.pcode.gen(OpCode.LIT, 0, record.value)
            elif record.type == 'var':
                self.pcode.gen(OpCode.LOD, self.current_level - record.level,
                               record.address)
            elif record.type == 'procedure':
                raise ParserError('Wrong variable type')
            self._forward()
        elif self.current_token.type == 'NUMBER':
            self.pcode.gen(OpCode.LIT, 0, int(self.current_token.value))
            self._forward()
        else:
            self._expect(Token(None, '('))
            self._forward()
            self._expression()
            self._expect(Token(None, ')'))
            self._forward()

    def _forward(self):
        try:
            self.current_token = next(self.token_generator)
        except StopIteration:
            raise ParserError('unexpected end of program', self.lexer.pos)

    def _expect(self, token: Token):
        if token.type is None:
            b = token.value == self.current_token.value
        elif token.value is None:
            b = token.type == self.current_token.type
        else:
            b = token == self.current_token
        if not b:
            raise ParserError(
                'Expecting "%s" but current token is "%s"' %
                (str(token.value), str(self.current_token.value)),
                self.lexer.pos)
    def ret(self):
        self.cur_sub, self.instructions = self.return_stack.pop()

    def load_sub(self, name):
        self.cur_sub = name
        self.load_chunk('__top__')

    def load_chunk(self, name):
        self.instructions = list(self.subs[self.cur_sub][name])


if __name__ == '__main__':
    import argparse
    from compiler.compiler import Compiler
    from compiler.preprocessor import Preprocessor
    from compiler.lexer import Lexer
    from compiler.parser_ import Parser

    parser = argparse.ArgumentParser()
    parser.add_argument('file', help="C File", type=argparse.FileType('r'))

    args = parser.parse_args()

    with args.file as f:
        pre = Preprocessor(f.read(), f.name)
        parser = Parser(Lexer(pre.transform()))
        compiler = Compiler()
        assembly = compiler.compile_program(parser.parse_program())
        e = Emulator(assembly)
        e.run()
Exemple #15
0
 def setUp(self):
     self.lexer = Lexer()
    parser.add_argument('--debug', action='store_true', help="Enable debug output")
    parser.add_argument('--stack', help="Stack size", type=int, default=8)
    parser.add_argument('--arg', help="ASM file arguments", action='append')
    parser.add_argument('--place-location', default="~1,~,~1",
                        help="Location to place command blocks")
    parser.add_argument('--enable-sync', help="Enable SYNC opcode", action='store_true')
    parser.add_argument('--page-size', type=int, default=64, help="Memory page size")
    parser.add_argument('--dump-asm', action='store_true', help="Dump generated ASM")

    args = parser.parse_args()

    compiler = Compiler()
    with args.file as f:
        pre = Preprocessor(f.read(), f.name)
        code = pre.transform()
        parser = Parser(Lexer(code))
        assembly = compiler.compile_program(parser.parse_program())

    if args.dump_asm:
        print(assembly)

    assembler = ExtendedAssembler()
    assembler.enable_sync = args.enable_sync
    assembler.parse(assembly)

    sargs = {}
    if args.arg:
        for arg in args.arg:
            k, v = arg.split('=', 2)
            sargs[k] = v
    parse_pos = lambda p: Rel(int(p[1:]) if p[1:] else 0) if p[0] == '~' else int(p)
Exemple #17
0
def test_parser(chunk, chunkname):
    parser = Parser()
    lexer = Lexer(chunk, chunkname)
    ast = parser.parse_block(lexer)
    print(ast)
Exemple #18
0
from compiler.lexer import Lexer
from compiler.parser import Parser
from compiler.codegen import LLVMCodegenVisitor

code = '''
def add(a: int, b: int) -> int:
    return a + b
'''

tokens = Lexer(code).lex()
ast = Parser(tokens).parse()
module = LLVMCodegenVisitor(ast).start_visit()

print(module)
Exemple #19
0
class LexerTests(unittest.TestCase):
    def setUp(self):
        self.lexer = Lexer()

    def expected_token_len(self, tokens, num):
        self.assertEqual(len(tokens), num, f'Token list should contain {num} token(s)')

    def expected_token(self, token: Token, token_type: TokenType, token_value: str):
        self.assertEqual(token.token_type, token_type, f'Token should be type {token_type}')
        self.assertEqual(token.value, token_value, f'Token should have value "{token_value}"')

    def test_lex_empty(self):
        self.lexer.input_text = ''
        tokens = self.lexer.lex_input()
        self.expected_token_len(tokens, 1)
        self.assertEqual(tokens[0].token_type, TokenType.EOF, 'Only token should be EOF token')

    def test_lex_whitespaces(self):
        self.lexer.input_text = '  \t     5          60 '
        tokens = self.lexer.lex_input()
        self.expected_token_len(tokens, 3)
        self.expected_token(tokens[0], TokenType.INT, '5')
        self.expected_token(tokens[1], TokenType.INT, '60')

    def test_lex_comments(self):
        self.lexer.input_text = '// Single line comment \n 1 /* multi line \n still comment \t  ccccc \n */ 2 /* to end'
        tokens = self.lexer.lex_input()
        self.expected_token_len(tokens, 3)
        self.expected_token(tokens[0], TokenType.INT, '1')
        self.expected_token(tokens[1], TokenType.INT, '2')

    def test_lex_brackets(self):
        self.lexer.input_text = '[]{}()'
        tokens = self.lexer.lex_input()
        self.expected_token_len(tokens, 7)
        self.expected_token(tokens[0], TokenType.BLOCK_OPEN, '[')
        self.expected_token(tokens[1], TokenType.BLOCK_CLOSE, ']')
        self.expected_token(tokens[2], TokenType.CURLY_OPEN, '{')
        self.expected_token(tokens[3], TokenType.CURLY_CLOSE, '}')
        self.expected_token(tokens[4], TokenType.PAREN_OPEN, '(')
        self.expected_token(tokens[5], TokenType.PAREN_CLOSE, ')')

    def test_lex_punctuation(self):
        self.lexer.input_text = '.,;'
        tokens = self.lexer.lex_input()
        self.expected_token_len(tokens, 4)
        self.expected_token(tokens[0], TokenType.DOT, '.')
        self.expected_token(tokens[1], TokenType.COMMA, ',')
        self.expected_token(tokens[2], TokenType.SEMICOLON, ';')

    def test_lex_fun_decl(self):
        self.lexer.input_text = '::   ->'
        tokens = self.lexer.lex_input()
        self.expected_token_len(tokens, 3)
        self.expected_token(tokens[0], TokenType.DOUBLECOLON, '::')
        self.expected_token(tokens[1], TokenType.ARROW, '->')

    def test_lex_operators(self):
        self.lexer.input_text = ': : - > <+*/==<=!>= > = != ! = &&||%'
        tokens = self.lexer.lex_input()
        self.expected_token_len(tokens, 21)
        self.expected_token(tokens[0], TokenType.OPERATOR, ':')
        self.expected_token(tokens[1], TokenType.OPERATOR, ':')
        self.expected_token(tokens[2], TokenType.OPERATOR, '-')
        self.expected_token(tokens[3], TokenType.OPERATOR, '>')
        self.expected_token(tokens[4], TokenType.OPERATOR, '<')
        self.expected_token(tokens[5], TokenType.OPERATOR, '+')
        self.expected_token(tokens[6], TokenType.OPERATOR, '*')
        self.expected_token(tokens[7], TokenType.OPERATOR, '/')
        self.expected_token(tokens[8], TokenType.OPERATOR, '==')
        self.expected_token(tokens[9], TokenType.OPERATOR, '<=')
        self.expected_token(tokens[10], TokenType.OPERATOR, '!')
        self.expected_token(tokens[11], TokenType.OPERATOR, '>=')
        self.expected_token(tokens[12], TokenType.OPERATOR, '>')
        self.expected_token(tokens[13], TokenType.OPERATOR, '=')
        self.expected_token(tokens[14], TokenType.OPERATOR, '!=')
        self.expected_token(tokens[15], TokenType.OPERATOR, '!')
        self.expected_token(tokens[16], TokenType.OPERATOR, '=')
        self.expected_token(tokens[17], TokenType.OPERATOR, '&&')
        self.expected_token(tokens[18], TokenType.OPERATOR, '||')
        self.expected_token(tokens[19], TokenType.OPERATOR, '%')

    def test_lex_char(self):
        self.lexer.input_text = "a'b'c '\n' '\t'"
        tokens = self.lexer.lex_input()
        self.expected_token_len(tokens, 6)
        self.expected_token(tokens[0], TokenType.IDENTIFIER, 'a')
        self.expected_token(tokens[1], TokenType.CHAR, 'b')
        self.expected_token(tokens[2], TokenType.IDENTIFIER, 'c')
        self.expected_token(tokens[3], TokenType.CHAR, '\n')
        self.expected_token(tokens[4], TokenType.CHAR, '\t')
Exemple #20
0
class LexerTestCase(unittest.TestCase):
    def setUp(self) -> None:
        self.lexer = None
        self.tokens = list()

    def _test_lex_impl(self, buf, expected_tokens):
        self.lexer = Lexer(buf)
        while True:
            tok = self.lexer.lex_token()
            self.tokens.append(tok)
            if tok.type == TokenType.EOF:
                break
        self.assertEqual(len(self.tokens), len(expected_tokens))
        for t, exp in zip(self.tokens, expected_tokens):
            self.assertEqual(t,
                             exp,
                             msg="Got=({0}), Expected=({1})".format(t, exp))

    def test_function(self):
        buf = """
        fn foo() -> int {
          print("blah");
        };
        """
        tokens = [
            Token(TokenType.FUNCTION, "fn"),
            Token(TokenType.IDENTIFIER, "foo"),
            Token(TokenType.L_BRACKET, "("),
            Token(TokenType.R_BRACKET, ")"),
            Token(TokenType.ARROW, "->"),
            Token(TokenType.INT, "int"),
            Token(TokenType.L_BRACE, "{"),
            Token(TokenType.IDENTIFIER, "print"),
            Token(TokenType.L_BRACKET, "("),
            Token(TokenType.STRING_LITERAL, "blah"),
            Token(TokenType.R_BRACKET, ")"),
            Token(TokenType.SEMICOLON, ";"),
            Token(TokenType.R_BRACE, "}"),
            Token(TokenType.SEMICOLON, ";"),
            Token(TokenType.EOF, str()),
        ]
        self._test_lex_impl(buf, tokens)

    def test_function_w_args(self):
        buf = """
        fn foo(int one, string two) -> int {
          print("blah");
        };
        """
        tokens = [
            Token(TokenType.FUNCTION, "fn"),
            Token(TokenType.IDENTIFIER, "foo"),
            Token(TokenType.L_BRACKET, "("),
            Token(TokenType.INT, "int"),
            Token(TokenType.IDENTIFIER, "one"),
            Token(TokenType.COMMA, ","),
            Token(TokenType.STRING, "string"),
            Token(TokenType.IDENTIFIER, "two"),
            Token(TokenType.R_BRACKET, ")"),
            Token(TokenType.ARROW, "->"),
            Token(TokenType.INT, "int"),
            Token(TokenType.L_BRACE, "{"),
            Token(TokenType.IDENTIFIER, "print"),
            Token(TokenType.L_BRACKET, "("),
            Token(TokenType.STRING_LITERAL, "blah"),
            Token(TokenType.R_BRACKET, ")"),
            Token(TokenType.SEMICOLON, ";"),
            Token(TokenType.R_BRACE, "}"),
            Token(TokenType.SEMICOLON, ";"),
            Token(TokenType.EOF, str()),
        ]
        self._test_lex_impl(buf, tokens)

    def test_struct(self):
        buf = """
        struct Foo {
          int id = 3;
          string name;
          fn foo() -> int {
            return 3;
          }
        };
        """
        tokens = [
            Token(TokenType.STRUCTURE, "struct"),
            Token(TokenType.IDENTIFIER, "Foo"),
            Token(TokenType.L_BRACE, "{"),
            Token(TokenType.INT, "int"),
            Token(TokenType.IDENTIFIER, "id"),
            Token(TokenType.ASSIGN, "="),
            Token(TokenType.NUMBER_LITERAL, "3"),
            Token(TokenType.SEMICOLON, ";"),
            Token(TokenType.STRING, "string"),
            Token(TokenType.IDENTIFIER, "name"),
            Token(TokenType.SEMICOLON, ";"),
            Token(TokenType.FUNCTION, "fn"),
            Token(TokenType.IDENTIFIER, "foo"),
            Token(TokenType.L_BRACKET, "("),
            Token(TokenType.R_BRACKET, ")"),
            Token(TokenType.ARROW, "->"),
            Token(TokenType.INT, "int"),
            Token(TokenType.L_BRACE, "{"),
            Token(TokenType.RETURN, "return"),
            Token(TokenType.NUMBER_LITERAL, "3"),
            Token(TokenType.SEMICOLON, ";"),
            Token(TokenType.R_BRACE, "}"),
            Token(TokenType.R_BRACE, "}"),
            Token(TokenType.SEMICOLON, ";"),
            Token(TokenType.EOF, str()),
        ]
        self._test_lex_impl(buf, tokens)

    def test_control_flow(self):
        buf = """
        if (x == 3) {
          let y = 3;
        }
        """
        tokens = [
            Token(TokenType.IF, "if"),
            Token(TokenType.L_BRACKET, "("),
            Token(TokenType.IDENTIFIER, "x"),
            Token(TokenType.EQUALS, "=="),
            Token(TokenType.NUMBER_LITERAL, "3"),
            Token(TokenType.R_BRACKET, ")"),
            Token(TokenType.L_BRACE, "{"),
            Token(TokenType.LET, "let"),
            Token(TokenType.IDENTIFIER, "y"),
            Token(TokenType.ASSIGN, "="),
            Token(TokenType.NUMBER_LITERAL, "3"),
            Token(TokenType.SEMICOLON, ";"),
            Token(TokenType.R_BRACE, "}"),
            Token(TokenType.EOF, str()),
        ]
        self._test_lex_impl(buf, tokens)

    def test_dot_operator(self):
        buf = """
        print(foo.bar);
        """
        tokens = [
            Token(TokenType.IDENTIFIER, "print"),
            Token(TokenType.L_BRACKET, "("),
            Token(TokenType.IDENTIFIER, "foo"),
            Token(TokenType.DOT, "."),
            Token(TokenType.IDENTIFIER, "bar"),
            Token(TokenType.R_BRACKET, ")"),
            Token(TokenType.SEMICOLON, ";"),
            Token(TokenType.EOF, str()),
        ]
        self._test_lex_impl(buf, tokens)
Exemple #21
0
function main() {
    let i = input("Please input the number: ");
    if (i > 0) {
        print("-> Call User Defined Function !");
        userDefined();
    } else {
        print();
        print("Input value equal to or less than 0 !");
    }
}

main();
"""

lexer = Lexer().build()  # Build the lexer using LexerGenerator
tokens: LexerStream
try:
    tokens = lexer.lex(call_declared_functions)  # Stream the input to analysis the lexical syntax
    tokenType = map(lambda x: x.gettokentype(), copy(tokens))
    tokenName = map(lambda x: x.getstr(), copy(tokens))
    pprint(list(copy(tokens)))
    # pprint(list(copy(tokenType)))
    # pprint(list(copy(tokenName)))
except (BaseException, Exception):
    traceback.print_exc()
finally:
    print("Finish lexical analysis !")

SymbolTable = ParserState()
syntaxRoot: Node
Exemple #22
0
import argparse

from graphviz import Digraph

import compiler.tree_printer
from compiler.lexer import Lexer
from compiler.names import Scope
from compiler.parser import Parser

lexer = Lexer()
lexer.build()
parser = Parser(lexer.tokens)
parser.build()
scope = Scope()


def print_tokens(code):
    lexer.lexer.input(code)
    tok = lexer.lexer.token()
    while tok:
        print(tok)
        tok = lexer.lexer.token()


def run(code, opt, ast_file_name=None, repl_mode=False):
    if repl_mode and code[-1] != ";":
        code += ";"

    res = parser.parse(lexer, code)

    if res is not None: