Exemple #1
0
    def test_compile_func_call(self):
        # This also tests scope_begin and scope_over

        opcodes = [
            OpCode("func_decl", "hello---", ""),
            OpCode("scope_begin", "", ""),
            OpCode("print", '"World"', None),
            OpCode("scope_over", "", ""),
            OpCode("func_call", "hello---", ""),
        ]
        table = SymbolTable()
        table.symbol_table = {
            1: ["hello", "var", "function"],
            2: ['"World"', "string", "constant"],
        }

        compile(opcodes, "testing.c", table)

        with open("testing.c", "r") as file:
            data = file.read().split("\n")

        os.remove("testing.c")

        self.assertEqual(
            data,
            [
                "#include <stdio.h>",
                "",
                "void hello(void) {",
                '\tprintf("World");',
                "}",
                "\thello();",
                "",
            ],
        )
Exemple #2
0
    def test_compile_while(self):
        opcodes = [
            OpCode("var_assign", "i---0", "int"),
            OpCode("while", "i < 10", None),
            OpCode("scope_begin", "", ""),
            OpCode("print", '"%d", i', None),
            OpCode("scope_over", "", ""),
        ]
        table = SymbolTable()
        table.symbol_table = {
            1: ["i", "int", "variable"],
            2: ["0", "int", "constant"],
            3: ["10", "int", "constant"],
        }

        compile(opcodes, "testing.c", table)

        with open("testing.c", "r") as file:
            data = file.read().split("\n")

        os.remove("testing.c")

        self.assertEqual(
            data,
            [
                "#include <stdio.h>",
                "\tint i = 0;",
                "\twhile(i < 10) {",
                '\tprintf("%d", i);',
                "}",
                "",
            ],
        )
Exemple #3
0
    def test_compile_for(self):
        opcodes = [
            OpCode("for", "i&&&1&&&10&&&+&&&<&&&1", None),
            OpCode("scope_begin", "", ""),
            OpCode("print", '"%d", i', None),
            OpCode("scope_over", "", ""),
        ]
        table = SymbolTable()
        table.symbol_table = {
            1: ["i", "int", "variable"],
            2: ["1", "int", "constant"],
            3: ["10", "int", "constant"],
            4: ["1", "int", "constant"],
        }

        compile(opcodes, "testing.c", table)

        with open("testing.c", "r") as file:
            data = file.read().split("\n")

        os.remove("testing.c")

        self.assertEqual(
            data,
            [
                "#include <stdio.h>",
                "\tfor(int i = 1; i < 10; i+=1) {",
                '\tprintf("%d", i);',
                "}",
                "",
            ],
        )
Exemple #4
0
    def test_compile_single_multi_line_comments(self):
        # Test single and multi line comments

        opcodes = [
            OpCode("single_line_comment", " single line", ""),
            OpCode(
                "multi_line_comment",
                """
                    Multi line
                    """,
                "",
            ),
        ]
        table = SymbolTable()

        compile(opcodes, "testing.c", table)

        with open("testing.c", "r") as file:
            data = file.read().split("\n")

        os.remove("testing.c")

        self.assertEqual(
            data,
            [
                "",
                "\t//  single line ",
                "/* ",
                "                    Multi line",
                "                    */",
                "",
            ],
        )
def unary_statement(tokens, i, table, func_ret_type):
    """
    Parse unary statement

    Params
    ======
    tokens      (list) = List of tokens
    i           (int)  = Current index in token
    table       (SymbolTable) = Symbol table constructed holding information about identifiers and constants

    Returns
    =======
    OpCode, int: The opcode for the unary code and the index after parsing unary statement

    Grammar
    =======
    unary_statement -> id operator
    id              -> [a-zA-Z_]?[a-zA-Z0-9_]*
    operator        -> ++ | --
    """

    # Check if assignment operator follows identifier name
    if tokens[i].type not in ["increment", "decrement"]:
        check_if(
            tokens[i + 1].type,
            ["increment", "decrement"],
            "Expected unary operator after identifier",
            tokens[i + 1].line_num,
        )
        # Check if expression follows = in assign statement
        op_value, _, i, func_ret_type = expression(
            tokens,
            i,
            table,
            "",
            accept_empty_expression=True,
            expect_paren=False,
            func_ret_type=func_ret_type,
        )
        # Return the opcode and i (the token after unary statement)
        return OpCode("unary", op_value), i, func_ret_type

    else:
        check_if(
            tokens[i + 1].type,
            "id",
            "Expected identifier after unary operator",
            tokens[i + 1].line_num,
        )
        op_value = -1
        if tokens[i].type == "increment":
            op_value = "++ --- "
        else:
            op_value = "-- --- "
        value, func_ret_type, _ = table.get_by_id(tokens[i + 1].val)
        op_value += str(value)
        return OpCode("unary", op_value), i + 2, func_ret_type
Exemple #6
0
    def test_check_include(self):
        opcodes = [OpCode("print", ""), OpCode("var_assign", "")]
        includes = check_include(opcodes)

        self.assertEqual(includes, "#include <stdio.h>")

        opcodes = [OpCode("var_assign", "")]
        includes = check_include(opcodes)

        self.assertEqual(includes, "")
Exemple #7
0
class TestOpCodeClass(unittest.TestCase):
    def setUp(self):
        self.opcode = OpCode("var_assign", "a---1 + 2", "int")

    def test__str__(self):
        self.assertEqual(str(self.opcode),
                         "OpCode('var_assign', 'a---1 + 2', 'int')")

    def test_opcode2dig(self):
        self.assertEqual(self.opcode.opcode2dig("var_assign"), 2)
        self.assertEqual(self.opcode.opcode2dig("unary"), 12)
        self.assertEqual(self.opcode.opcode2dig("hello"), 0)
Exemple #8
0
    def test_compile_main_end_main(self):
        opcodes = [OpCode("MAIN", "", ""), OpCode("END_MAIN", "", "")]
        table = SymbolTable()

        compile(opcodes, "testing.c", table)

        with open("testing.c", "r") as file:
            data = file.read().split("\n")

        os.remove("testing.c")

        self.assertEqual(data,
                         ["", "", "int main() {", "", "\treturn 0;", "}"])
Exemple #9
0
    def test_compile_continue_break(self):
        # Test continue and break statements

        opcodes = [OpCode("continue", "", ""), OpCode("break", "", "")]
        table = SymbolTable()

        compile(opcodes, "testing.c", table)

        with open("testing.c", "r") as file:
            data = file.read().split("\n")

        os.remove("testing.c")

        self.assertEqual(data, ["", "\tcontinue;", "\tbreak;", ""])
Exemple #10
0
def switch_statement(tokens, i, table, func_ret_type):

    check_if(tokens[i].type, "left_paren", "Expected ( after switch",
             tokens[i].line_num)

    op_value, _, i, func_ret_type = expression(
        tokens,
        i + 1,
        table,
        "Expected expression inside switch statement",
        func_ret_type=func_ret_type,
    )

    check_if(
        tokens[i - 1].type,
        "right_paren",
        "Expected ) after expression in switch",
        tokens[i - 1].line_num,
    )

    check_if(
        tokens[i + 1].type,
        "left_brace",
        "Expected { after switch statement",
        tokens[i].line_num,
    )

    return OpCode("switch", op_value[:-1], ""), i + 1, func_ret_type
Exemple #11
0
    def test_compile_assign(self):
        opcodes = [
            OpCode("var_no_assign", "a", None),
            OpCode("assign", "a---=---3.14159", ""),
        ]
        table = SymbolTable()
        table.symbol_table = {
            1: ["a", "float", "variable"],
            2: ["3.14159", "float", "constant"],
        }

        compile(opcodes, "testing.c", table)

        with open("testing.c", "r") as file:
            data = file.read().split("\n")

        os.remove("testing.c")

        self.assertEqual(data, ["", "\tfloat a;", "\ta = 3.14159;", ""])
Exemple #12
0
    def test_compile_unary(self):
        opcodes = [
            OpCode("var_assign", "a---1", "int"),
            OpCode("unary", "a ++ ", None)
        ]
        table = SymbolTable()
        table.symbol_table = {
            1: ["a", "int", "variable"],
            2: ["1", "int", "constant"]
        }

        compile(opcodes, "testing.c", table)

        with open("testing.c", "r") as file:
            data = file.read().split("\n")

        os.remove("testing.c")

        self.assertEqual(data, ["", "\tint a = 1;", "\ta++;", ""])
Exemple #13
0
def exit_statement(tokens, i, table, func_ret_type):
    """
    Parse exit statement

    Params
    ======
    tokens      (list) = List of tokens
    i           (int)  = Current index in token
    table       (SymbolTable) = Symbol table constructed holding information about identifiers and constants

    Returns
    =======
    OpCode, int: The opcode for the assign code and the index after parsing exit statement

    Grammar
    =======
    exit_statement -> exit(expr)
    expr            -> number
    number          -> [0-9]+

    """
    # Check if ( follows exit statement
    check_if(
        tokens[i].type,
        "left_paren",
        "Expected ( after exit statement",
        tokens[i].line_num,
    )

    # Check if number follows ( in exit statement
    check_if(
        tokens[i + 1].type,
        "number",
        "Expected number after ( in exit statement",
        tokens[i].line_num,
    )

    # check if expression follows ( in exit statement
    op_value, _, i, func_ret_type = expression(
        tokens,
        i + 1,
        table,
        "Expected expression inside exit statement",
        func_ret_type=func_ret_type,
    )
    op_value_list = op_value.replace(" ", "").split(",")
    # check if ) follows expression in exit statement
    check_if(
        tokens[i - 1].type,
        "right_paren",
        "Expected ) after expression in exit statement",
        tokens[i - 1].line_num,
    )

    return OpCode("exit", op_value[:-1]), i, func_ret_type
Exemple #14
0
    def test_compile_ptr_assign(self):
        opcodes = [
            OpCode("var_assign", "a---1", "int"),
            OpCode("ptr_assign", "n---&a---1", "int"),
        ]
        table = SymbolTable()
        table.symbol_table = {
            1: ["a", "int", "variable"],
            2: ["1", "int", "constant"],
            3: ["n", "int", "variable"],
        }

        compile(opcodes, "testing.c", table)

        with open("testing.c", "r") as file:
            data = file.read().split("\n")

        os.remove("testing.c")

        self.assertEqual(data, ["", "\tint a = 1;", "\tint *n = &a;", ""])
Exemple #15
0
    def test_compile_return(self):
        opcodes = [
            OpCode("func_decl", "hello---", ""),
            OpCode("scope_begin", "", ""),
            OpCode("return", "1", ""),
            OpCode("scope_over", "", ""),
        ]
        table = SymbolTable()
        table.symbol_table = {
            1: ["hello", "int", "function"],
            2: ["1", "int", "constant"],
        }

        compile(opcodes, "testing.c", table)

        with open("testing.c", "r") as file:
            data = file.read().split("\n")

        os.remove("testing.c")

        self.assertEqual(
            data, ["", "", "int hello(void) {", "", "\treturn 1;", "}", ""])
Exemple #16
0
    def test_compile_exit(self):
        opcodes = [OpCode("exit", "0", None)]
        table = SymbolTable()
        table.symbol_table = {1: ["0", "int", "constant"]}

        compile(opcodes, "testing.c", table)

        with open("testing.c", "r") as file:
            data = file.read().split("\n")

        os.remove("testing.c")

        self.assertEqual(data, ["", "\texit(0);", ""])
Exemple #17
0
    def test_compile_print(self):
        opcodes = [OpCode("print", '"%d", 1')]
        table = SymbolTable()

        compile(opcodes, "testing.c", table)

        with open("testing.c", "r") as file:
            data = file.read().split("\n")

        os.remove("testing.c")

        self.assertEqual(data,
                         ["#include <stdio.h>", '\tprintf("%d", 1);', ""])
Exemple #18
0
    def test_compile_var_no_assign(self):
        opcodes = [OpCode("var_no_assign", "a", None)]
        table = SymbolTable()
        table.symbol_table = {1: ["a", "declared", "variable"]}

        compile(opcodes, "testing.c", table)

        with open("testing.c", "r") as file:
            data = file.read().split("\n")

        os.remove("testing.c")

        self.assertEqual(data, ["", "\tdeclared a;", ""])
Exemple #19
0
def case_statement(tokens, i, table, func_ret_type):

    op_value, _, i, func_ret_type = expression(
        tokens,
        i,
        table,
        "Expected expected expression after case",
        expect_paren=False,
        func_ret_type=func_ret_type,
    )

    check_if(
        tokens[i].type,
        "colon",
        "Expected : after case in switch statement",
        tokens[i].line_num,
    )

    return OpCode("case", op_value, ""), i + 1, func_ret_type
Exemple #20
0
def print_statement(tokens, i, table, func_ret_type):
    """
    Parse print statement

    Params
    ======
    tokens        (list)        = List of tokens
    i             (int)         = Current index in token
    table         (SymbolTable) = Symbol table constructed holding information about identifiers and constants
    func_ret_type (string)      = Function return type

    Returns
    =======
    OpCode, int: The opcode for the print code and the index after parsing print statement

    Grammar
    =======
    print_statement -> print(expr)
    expr            -> string | number | id | operator
    string          -> quote [a-zA-Z0-9`~!@#$%^&*()_-+={[]}:;,.?/|\]+ quote
    quote           -> "
    number          -> [0-9]+
    id              -> [a-zA-Z_]?[a-zA-Z0-9_]*
    operator        -> + | - | * | /
    """

    # Check if ( follows print statement
    check_if(
        tokens[i].type,
        "left_paren",
        "Expected ( after print statement",
        tokens[i].line_num,
    )

    # Check if expression follows ( in print statement
    op_value, op_type, i, func_ret_type = expression(
        tokens,
        i + 1,
        table,
        "Expected expression inside print statement",
        func_ret_type=func_ret_type,
    )

    # Map datatype to appropriate format specifiers
    prec_to_type = {
        0: "",
        1: '"%s", ',
        2: '"%c", ',
        3: '"%d", ',
        4: '"%f", ',
        5: '"%lf", ',
    }
    op_value = prec_to_type[op_type] + op_value[:-1]

    # Check if print statement has closing )
    check_if(
        tokens[i - 1].type,
        "right_paren",
        "Expected ) after expression in print statement",
        tokens[i - 1].line_num,
    )

    # Return the opcode and i+1 (the token after print statement)
    return OpCode("print", op_value), i + 1, func_ret_type
Exemple #21
0
    def test_compile_if_else_if_else(self):
        # Testing if, else if, else

        opcodes = [
            OpCode("var_assign", "i---0", "int"),
            OpCode("if", "i == 1", None),
            OpCode("scope_begin", "", ""),
            OpCode("print", '"%d", 1', None),
            OpCode("scope_over", "", ""),
            OpCode("else_if", "i == 2", None),
            OpCode("scope_begin", "", ""),
            OpCode("print", '"%d", 2', None),
            OpCode("scope_over", "", ""),
            OpCode("else", "", ""),
            OpCode("scope_begin", "", ""),
            OpCode("print", '"Else"', None),
            OpCode("scope_over", "", ""),
        ]
        table = SymbolTable()
        table.symbol_table = {
            1: ["i", "int", "variable"],
            2: ["0", "int", "constant"],
            3: ["1", "int", "constant"],
            4: ["1", "int", "constant"],
            5: ["2", "int", "constant"],
            6: ["2", "int", "constant"],
            7: ['"Else"', "string", "constant"],
        }

        compile(opcodes, "testing.c", table)

        with open("testing.c", "r") as file:
            data = file.read().split("\n")

        os.remove("testing.c")

        self.assertEqual(
            data,
            [
                "#include <stdio.h>",
                "\tint i = 0;",
                "\tif(i == 1) {",
                '\tprintf("%d", 1);',
                "}",
                "\telse if(i == 2) {",
                '\tprintf("%d", 2);',
                "}",
                "\telse {",
                '\tprintf("Else");',
                "}",
                "",
            ],
        )
Exemple #22
0
def for_statement(tokens, i, table, func_ret_type):
    """
    Parse for for_loop

    Params
    ======
    tokens      (list) = List of tokens
    i           (int)  = Current index in token
    table       (SymbolTable) = Symbol table constructed holding information about identifiers and constants

    Returns
    =======
    OpCode, int: The opcode for the for loop code and the index after parsing for loop

    Grammar
    =======
    for_loop    -> for id in number to number by operator number
    number      -> [0-9]+
    id          -> [a-zA-Z_]?[a-zA-Z0-9_]*
    operator    -> + | - | * | /

    """

    # Check if identifier follows for keyword
    check_if(tokens[i].type, "id", "Expected variable name",
             tokens[i].line_num)

    # Check if in follows identifier
    check_if(tokens[i + 1].type, "in", "Expected in keyword",
             tokens[i + 1].line_num)

    # Check if number follows in keyword
    check_if(tokens[i + 2].type, "number", "Expected starting value",
             tokens[i + 2].line_num)

    # Check if to keyword follows number
    check_if(tokens[i + 3].type, "to", "Expected to keyword",
             tokens[i + 3].line_num)

    # Check if number follows in keyword
    check_if(tokens[i + 4].type, "number", "Expected ending value",
             tokens[i + 4].line_num)

    # Check if by keyword follows number
    check_if(tokens[i + 5].type, "by", "Expected by keyword",
             tokens[i + 5].line_num)

    word_to_op = {"plus": "+", "minus": "-", "multiply": "*", "divide": "/"}

    # Check if number follows operator
    check_if(
        tokens[i + 7].type,
        "number",
        "Expected value for change",
        tokens[i + 7].line_num,
    )

    # Get required values
    var_name, _, _ = table.get_by_id(tokens[i].val)
    table.symbol_table[tokens[i].val][1] = "int"
    starting_val, _, _ = table.get_by_id(tokens[i + 2].val)
    ending_val, _, _ = table.get_by_id(tokens[i + 4].val)
    operator_type = word_to_op[tokens[i + 6].type]
    change_val, _, _ = table.get_by_id(tokens[i + 7].val)

    # To determine the > or < sign
    if starting_val > ending_val:
        sign_needed = ">"
    else:
        sign_needed = "<"

    # Return the opcode and i+1 (the token after for loop statement)
    return (
        OpCode(
            "for",
            str(var_name) + "&&&" + str(starting_val) + "&&&" +
            str(ending_val) + "&&&" + str(operator_type) + "&&&" +
            sign_needed + "&&&" + str(change_val),
        ),
        i + 1,
        func_ret_type,
    )
Exemple #23
0
def if_statement(tokens, i, table, func_ret_type):
    """
    Parse if statement

    Params
    ======
    tokens      (list) = List of tokens
    i           (int)  = Current index in token
    table       (SymbolTable) = Symbol table constructed holding information about identifiers and constants

    Returns
    =======
    OpCode, int: The opcode for the assign code and the index after parsing if statement

    Grammar
    =======
    if_statement -> if(condition) { body }
    condition       -> expr
    expr            -> string | number | id | operator
    string          -> quote [a-zA-Z0-9`~!@#$%^&*()_-+={[]}:;,.?/|\]+ quote
    quote           -> "
    number          -> [0-9]+
    id              -> [a-zA-Z_]?[a-zA-Z0-9_]*
    operator        -> + | - | * | /
    """
    # Check if ( follows if statement
    check_if(
        tokens[i].type,
        "left_paren",
        "Expected ( after if statement",
        tokens[i].line_num,
    )

    # check if expression follows ( in if statement
    op_value, op_type, i, func_ret_type = expression(
        tokens,
        i + 1,
        table,
        "Expected expression inside if statement",
        func_ret_type=func_ret_type,
    )
    op_value_list = op_value.replace(" ", "").split(",")
    # check if ) follows expression in if statement
    check_if(
        tokens[i - 1].type,
        "right_paren",
        "Expected ) after expression in if statement",
        tokens[i - 1].line_num,
    )

    # If \n follows ) then skip all the \n characters
    if tokens[i + 1].type == "newline":
        i += 1
        while tokens[i].type == "newline":
            i += 1
        i -= 1

    # Check if { follows ) in if statement
    check_if(
        tokens[i + 1].type,
        "left_brace",
        "Expected { before if body",
        tokens[i + 1].line_num,
    )

    # Loop until } is reached
    i += 2
    ret_idx = i
    found_right_brace = False
    while i < len(tokens) and tokens[i].type != "right_brace":
        if found_right_brace:
            found_right_brace = True
        i += 1

    # If right brace found at end
    if i != len(tokens) and tokens[i].type == "right_brace":
        found_right_brace = True

    # If right brace is not found then produce error
    if not found_right_brace:
        error("Expected } after if body", tokens[i].line_num)

    return OpCode("if", op_value[:-1]), ret_idx - 1, func_ret_type
Exemple #24
0
def function_call_statement(tokens, i, table, func_ret_type):
    """
    Parse function calling statement

    Params
    ======
    tokens        (list)        = List of tokens
    i             (int)         = Current index in token
    table         (SymbolTable) = Symbol table constructed holding information about identifiers and constants
    func_ret_type (dict)        = If return type of function is not figured yet

    Returns
    =======
    OpCode, int, dict: The opcode for the assign code, index after parsing function calling statement and function return type

    Grammar
    =======
    function_call_statement   -> id([actual_params,]*)
    actual_params             -> expr
    body                      -> statement
    expr                      -> string | number | id | operator
    string                    -> quote [a-zA-Z0-9`~!@#$%^&*()_-+={[]}:;,.?/|\]+ quote
    quote                     -> "
    number                    -> [0-9]+
    id                        -> [a-zA-Z_]?[a-zA-Z0-9_]*
    operator                  -> + | - | * | /
    """

    # Get information about the function from symbol table
    func_name, _, metadata = table.get_by_id(tokens[i].val)

    # Extract params from functions metadata (typedata), these are stored as <id>---[<param 1>, . . . , <param n>]
    params = metadata.split("---")[1:] if "---" in metadata else [")"]
    num_formal_params = len(params) if params != [")"] else 0

    # Parse the params
    op_value, op_type, i, func_ret_type = expression(
        tokens,
        i + 2,
        table,
        "",
        True,
        True,
        expect_paren=True,
        func_ret_type=func_ret_type,
    )
    op_value_list = op_value.replace(" ", "").split(",")
    op_value_list = (op_value_list if len(op_value_list) > 0
                     and len(op_value_list[0]) > 0 else [])
    num_actual_params = len(op_value_list) if op_value_list != [")"] else 0

    # Check if number of actual and formal parameters match
    if num_formal_params != num_actual_params:
        error(
            "Expected %d parameters but got %d parameters in function %s" %
            (num_formal_params, num_actual_params, func_name),
            tokens[i].line_num,
        )

    # Assign datatype to formal parameters
    for j in range(len(params)):
        # If parameter list is empty
        if params[j] == ")":
            continue

        # Fetch the datatype of corresponding actual parameter from symbol table
        _, dtype, _ = table.get_by_id(
            table.get_by_symbol(op_value_list[j].replace(")", "")))

        # Set the datatype of the formal parameter
        table.symbol_table[table.get_by_symbol(params[j])][1] = dtype

    if func_name in func_ret_type.keys():
        _, op_type, _, _ = expression(tokens, func_ret_type[func_name], table,
                                      "")

        #  Map datatype to appropriate datatype in C
        prec_to_type = {
            0: "char*",
            1: "char*",
            2: "char",
            3: "int",
            4: "float",
            5: "double",
        }

        table.symbol_table[table.get_by_symbol(
            func_name)][1] = prec_to_type[op_type]
        del func_ret_type[func_name]

    return (
        OpCode("func_call", func_name + "---" + "&&&".join(op_value_list)[:-1],
               ""),
        i + 1,
        func_ret_type,
    )
Exemple #25
0
def function_definition_statement(tokens, i, table, func_ret_type):
    """
    Parse function definition statement

    Params
    ======
    tokens      (list) = List of tokens
    i           (int)  = Current index in token
    table       (SymbolTable) = Symbol table constructed holding information about identifiers and constants
    func_ret_type (string) = Function return type

    Returns
    =======
    OpCode, int, string: The opcode for the assign code, the index, and the name of the function after
                         parsing function calling statement

    Grammar
    =======
    function_definition_statement   -> fun id([formal_params,]*) { body }
    formal_params                   -> expr
    body                            -> statement
    expr                            -> string | number | id | operator
    string                          -> quote [a-zA-Z0-9`~!@#$%^&*()_-+={[]}:;,.?/|\]+ quote
    quote                           -> "
    number                          -> [0-9]+
    id                              -> [a-zA-Z_]?[a-zA-Z0-9_]*
    operator                        -> + | - | * | /
    """

    # Check if identifier follows fun
    check_if(tokens[i].type, "id", "Expected function name",
             tokens[i].line_num)

    # Store the id of function name in symbol table
    func_idx = tokens[i].val

    # Get function name
    func_name, _, _ = table.get_by_id(func_idx)

    # Check if ( follows id in function
    check_if(
        tokens[i + 1].type,
        "left_paren",
        "Expected ( after function name",
        tokens[i + 1].line_num,
    )

    # Check if expression follows ( in function statement
    op_value, op_type, i, func_ret_type = expression(
        tokens, i + 2, table, "", True, True, func_ret_type=func_ret_type)
    op_value_list = op_value.replace(" ", "").replace(")", "").split(",")

    # Check if ) follows expression in function
    check_if(
        tokens[i - 1].type,
        "right_paren",
        "Expected ) after function params list",
        tokens[i - 1].line_num,
    )

    # If \n follows ) then skip all the \n characters
    if tokens[i + 1].type == "newline":
        i += 1
        while tokens[i].type == "newline":
            i += 1
        i -= 1

    # Check if { follows ) in function
    check_if(
        tokens[i + 1].type,
        "left_brace",
        "Expected { before function body",
        tokens[i + 1].line_num,
    )

    # Loop until } is reached
    i += 2
    ret_idx = i
    found_right_brace = False
    while i < len(tokens) and tokens[i].type != "right_brace":
        if tokens[i].type == "right_brace":
            found_right_brace = True
        i += 1

    # If right brace found at end
    if i != len(tokens) and tokens[i].type == "right_brace":
        found_right_brace = True

    # If right brace is not found then produce error
    if not found_right_brace:
        error("Expected } after function body", tokens[i].line_num)

    # Add the identifier types to function's typedata
    table.symbol_table[func_idx][2] = (
        "function---" + "---".join(op_value_list) if len(op_value_list) > 0
        and len(op_value_list[0]) > 0 else "function")

    return (
        OpCode("func_decl", func_name + "---" + "&&&".join(op_value_list), ""),
        ret_idx - 1,
        func_name,
        func_ret_type,
    )
Exemple #26
0
def parse(tokens, table):
    """
    Parse tokens and generate opcodes

    Params
    ======
    tokens (list) = List of tokens

    Returns
    =======
    list: The list of opcodes

    Grammar
    =======
    statement -> print_statement | var_statement | assign_statement | function_definition_statement
    """

    # List of opcodes
    op_codes = []

    # Current function's name
    func_name = ""

    # Do while started or not
    in_do = False

    # Count main functions
    main_fn_count = 0

    # Count if conditions
    if_count = 0

    # Brace count
    brace_count = 0

    # If function return type could not be figured out during return then do it while calling
    func_ret_type = {}

    # Loop through all the tokens
    i = 0
    while i <= len(tokens) - 1:
        # If token is of type print then generate print opcode
        if tokens[i].type == "print":
            print_opcode, i, func_ret_type = print_statement(
                tokens, i + 1, table, func_ret_type)
            op_codes.append(print_opcode)
        # If token is of type var then generate var opcode
        elif tokens[i].type == "var":
            var_opcode, i, func_ret_type = var_statement(
                tokens, i + 1, table, func_ret_type)
            op_codes.append(var_opcode)
        # If token is of type id then generate assign opcode
        elif tokens[i].type == "id":
            # If '(' follows id then it is function calling else variable assignment
            if tokens[i + 1].type == "left_paren":
                fun_opcode, i, func_ret_type = function_call_statement(
                    tokens, i, table, func_ret_type)
                op_codes.append(fun_opcode)
            elif tokens[i + 1].type in ["increment", "decrement"]:
                unary_opcode, i, func_ret_type = unary_statement(
                    tokens, i, table, func_ret_type)
                op_codes.append(unary_opcode)
            else:
                assign_opcode, i, func_ret_type = assign_statement(
                    tokens, i + 1, table, func_ret_type)
                op_codes.append(assign_opcode)
        # If token is of type fun then generate function opcode
        elif tokens[i].type == "fun":
            fun_opcode, i, func_name, func_ret_type = function_definition_statement(
                tokens, i + 1, table, func_ret_type)
            op_codes.append(fun_opcode)
        # If token is of type left_brace then generate scope_begin opcode
        elif tokens[i].type == "left_brace":
            op_codes.append(OpCode("scope_begin", "", ""))
            brace_count += 1
            i += 1
        # If token is of type right_brace then generate scope_over opcode
        elif tokens[i].type == "right_brace":
            op_codes.append(OpCode("scope_over", "", ""))
            brace_count -= 1

            if brace_count < 0:
                error(
                    "Closing brace doesn't match any previous opening brace",
                    tokens[i].line_num,
                )
            i += 1
        # If token is of type MAIN then generate MAIN opcode
        elif tokens[i].type == "MAIN":
            op_codes.append(OpCode("MAIN", "", ""))
            main_fn_count += 1
            if main_fn_count > 1:
                error("Presence of two MAIN in a single file",
                      tokens[i].line_num)
            i += 1
        # If token is of type END_MAIN then generate MAIN opcode
        elif tokens[i].type == "END_MAIN":
            op_codes.append(OpCode("END_MAIN", "", ""))
            main_fn_count -= 1
            i += 1
        # If token is of type for then generate for code
        elif tokens[i].type == "for":
            for_opcode, i, func_ret_type = for_statement(
                tokens, i + 1, table, func_ret_type)
            op_codes.append(for_opcode)
        # If token is of type do then generate do_while code
        elif tokens[i].type == "do":
            check_if(
                tokens[i + 1].type,
                "left_brace",
                "Expected { after do statement",
                tokens[i + 1].line_num,
            )
            in_do = True
            op_codes.append(OpCode("do", "", ""))
            i += 1
        # If token is of type while then generate while opcode
        elif tokens[i].type == "while":
            while_opcode, i, func_ret_type = while_statement(
                tokens, i + 1, table, in_do, func_ret_type)
            if in_do:
                in_do = False
            op_codes.append(while_opcode)
        # If token is of type if then generate if opcode
        elif tokens[i].type == "if":
            if_opcode, i, func_ret_type = if_statement(tokens, i + 1, table,
                                                       func_ret_type)
            op_codes.append(if_opcode)

            # Increment if count on encountering if
            if_count += 1
        # If token is of type exit then generate exit opcode
        elif tokens[i].type == "exit":
            exit_opcode, i, func_ret_type = exit_statement(
                tokens, i + 1, table, func_ret_type)
            op_codes.append(exit_opcode)
        # If token is of type else then check whether it is else if or else
        elif tokens[i].type == "else":
            # If the next token is if, then it is else if
            if tokens[i + 1].type == "if":
                if_opcode, i, func_ret_type = if_statement(
                    tokens, i + 2, table, func_ret_type)
                if_opcode.type = "else_if"
                op_codes.append(if_opcode)
            # Otherwise it is else
            elif tokens[i + 1].type == "left_brace":
                op_codes.append(OpCode("else", "", ""))

                # Decrement if count on encountering if, to make sure there aren't extra else conditions
                if_count -= 1

                # If if_count is negative then the current else is extra
                if if_count < 0:
                    error("Else does not match any if!", tokens[i].line_num)

                i += 1
        # If token is of type return then generate return opcode
        elif tokens[i].type == "return":
            beg_idx = i + 1
            if tokens[i + 1].type not in ["id", "number", "string"]:
                op_value = ""
                op_type = 6
                i += 2
            else:
                op_value, op_type, i, func_ret_type = expression(
                    tokens,
                    i + 1,
                    table,
                    "Expected expression after return",
                    True,
                    True,
                    expect_paren=False,
                    func_ret_type=func_ret_type,
                )
            if func_name == "":
                error("Return statement outside any function",
                      tokens[i].line_num)
            else:
                #  Map datatype to appropriate datatype in C
                prec_to_type = {
                    -1: "not_known",
                    0: "char*",
                    1: "char*",
                    2: "char",
                    3: "int",
                    4: "float",
                    5: "double",
                    6: "void",
                }

                if op_type == -1:
                    func_ret_type[func_name] = beg_idx

                # Change return type of function
                table.symbol_table[table.get_by_symbol(
                    func_name)][1] = prec_to_type[op_type]

                # Set func_name to an empty string after processing
                func_name = ""
            op_codes.append(OpCode("return", op_value, ""))
        # If token is of type break then generate break opcode
        elif tokens[i].type == "break":
            op_codes.append(OpCode("break", "", ""))
            i += 1
        # If token is of type continue then generate continue opcode
        elif tokens[i].type == "continue":
            op_codes.append(OpCode("continue", "", ""))
            i += 1
        # If token is of type single_line_statement then generate single_line_comment opcode
        elif tokens[i].type == "single_line_comment":
            op_codes.append(OpCode("single_line_comment", tokens[i].val, ""))
            i += 1
        # If token is of type multi_line_statement then generate multi_line_comment opcode
        elif tokens[i].type == "multi_line_comment":
            op_codes.append(OpCode("multi_line_comment", tokens[i].val, ""))
            i += 1
        # If token is of type switch then generate switch opcode
        elif tokens[i].type == "switch":
            switch_opcode, i, func_ret_type = switch_statement(
                tokens, i + 1, table, func_ret_type)
            op_codes.append(switch_opcode)
        # If token is of type case then generate case opcode
        elif tokens[i].type == "case":
            case_opcode, i, func_ret_type = case_statement(
                tokens, i + 1, table, func_ret_type)
            op_codes.append(case_opcode)
        # If token is of type default then generate default opcode
        elif tokens[i].type == "default":
            check_if(
                tokens[i + 1].type,
                "colon",
                "Expected : after default statement in switch",
                tokens[i + 1].line_num,
            )
            op_codes.append(OpCode("default", "", ""))
            i += 2
        # If token is the type increment or decrement then generate unary_opcode
        elif tokens[i].type in ["increment", "decrement"]:
            unary_opcode, i, func_ret_type = unary_statement(
                tokens, i, table, func_ret_type)
            op_codes.append(unary_opcode)

        # Otherwise increment the index
        else:
            i += 1

    # Errors that may occur after parsing loop
    if main_fn_count != 0:
        error("MAIN not ended with END_MAIN", tokens[i - 1].line_num + 1)

    # Return opcodes
    return op_codes
Exemple #27
0
def var_statement(tokens, i, table, func_ret_type):
    """
    Parse variable declaration [/initialization] statement

    Params
    ======
    tokens      (list) = List of tokens
    i           (int)  = Current index in token
    table       (SymbolTable) = Symbol table constructed holding information about identifiers and constants
    func_ret_type (string) = Function return type

    Returns
    =======
    OpCode, int: The opcode for the var_assign/var_no_assign code and the index after parsing var statement

    Grammar
    =======
    var_statement   -> var id [= expr]?
    expr            -> string | number | id | operator
    string          -> quote [a-zA-Z0-9`~!@#$%^&*()_-+={[]}:;,.?/|\]+ quote
    quote           -> "
    number          -> [0-9]+
    id              -> [a-zA-Z_]?[a-zA-Z0-9_]*
    operator        -> + | - | * | /
    """

    is_ptr, count_ast, i = check_ptr(tokens, i)
    # Check if identifier is present after var
    check_if(tokens[i].type, "id", "Expected id after var keyword",
             tokens[i].line_num)

    # Tokens that are not accepted after declaration of a variable
    invalid_tokens = [
        "plus_equal",
        "minus_equal",
        "divide_equal",
        "multiply_equal",
        "plus",
        "minus",
        "divide",
        "multiply",
        "modulus",
        "modulus_equal",
        "equal",
        "not_equal",
    ]
    # Check if variable is also initialized
    if i + 1 < len(tokens) and tokens[i + 1].type == "assignment":
        # Store the index of identifier
        id_idx = i

        # Check if expression follows = in var statement
        op_value, op_type, i, func_ret_type = expression(
            tokens,
            i + 2,
            table,
            "Required expression after assignment operator",
            expect_paren=False,
            func_ret_type=func_ret_type,
        )

        # Map datatype to appropriate datatype in C
        prec_to_type = {
            0: "string",
            1: "string",
            2: "char",
            3: "int",
            4: "float",
            5: "double",
        }

        # Modify datatype of the identifier
        table.symbol_table[tokens[id_idx].val][1] = prec_to_type[op_type]

        if is_ptr:
            return (
                OpCode(
                    "ptr_assign",
                    table.symbol_table[tokens[id_idx].val][0] + "---" +
                    op_value + "---" + str(count_ast),
                    prec_to_type[op_type],
                ),
                i,
                func_ret_type,
            )
        else:
            # Return the opcode and i (the token after var statement)
            return (
                OpCode(
                    "var_assign",
                    table.symbol_table[tokens[id_idx].val][0] + "---" +
                    op_value,
                    prec_to_type[op_type],
                ),
                i,
                func_ret_type,
            )
    elif i + 1 < len(tokens) and tokens[i + 1].type in invalid_tokens:
        error("Invalid Syntax for declaration", tokens[i].line_num)
    else:
        # Get the value from symbol table by id
        value, type, _ = table.get_by_id(tokens[i].val)

        # If already declared then throw error
        if type in [
                "declared",
                "int",
                "char",
                "float",
                "double",
                "string",
                "char *",
                "char*",
        ]:
            error("Variable %s already declared" % value, tokens[i].line_num)

        # Set declared
        table.symbol_table[tokens[i].val][1] = "declared"

        # Return the opcode and i+1 (the token after var statement)
        if is_ptr:
            return OpCode("ptr_no_assign", value), i + 1, func_ret_type

        return OpCode("var_no_assign", value), i + 1, func_ret_type
Exemple #28
0
 def setUp(self):
     self.opcode = OpCode("var_assign", "a---1 + 2", "int")
Exemple #29
0
def assign_statement(tokens, i, table, func_ret_type):
    """
    Parse assignment statement

    Params
    ======
    tokens      (list) = List of tokens
    i           (int)  = Current index in token
    table       (SymbolTable) = Symbol table constructed holding information about identifiers and constants

    Returns
    =======
    OpCode, int: The opcode for the assign code and the index after parsing assign statement

    Grammar
    =======
    var_statement   -> var id [= expr]?
    expr            -> string | number | id | operator
    string          -> quote [a-zA-Z0-9`~!@#$%^&*()_-+={[]}:;,.?/|\]+ quote
    quote           -> "
    number          -> [0-9]+
    id              -> [a-zA-Z_]?[a-zA-Z0-9_]*
    operator        -> + | - | * | /
    """

    # Check if the identifier is a pointer
    is_ptr = False
    # count depth of pointer
    count_ast = 0
    if tokens[i - 2].type == "multiply":
        j = -2
        while tokens[j + i].type == "multiply":
            j -= 1
        count_ast = -1 * j - 2
        is_ptr = True

    # Check if variable is declared or not
    value, type, _ = table.get_by_id(tokens[i - 1].val)

    if type == "var":
        error("Variable %s used before declaration" % value,
              tokens[i - 1].line_num)

    # Dictionary to convert tokens to their corresponding assignment types
    assignment_type = {
        "assignment": "=",
        "plus_equal": "+=",
        "minus_equal": "-=",
        "multiply_equal": "*=",
        "divide_equal": "/=",
        "modulus_equal": "%=",
    }
    # Check if assignment operator follows identifier name
    check_if(
        tokens[i].type,
        [
            "assignment",
            "plus_equal",
            "minus_equal",
            "multiply_equal",
            "divide_equal",
            "modulus_equal",
        ],
        "Expected assignment operator after identifier",
        tokens[i].line_num,
    )
    # Convert the token to respective symbol
    converted_type = assignment_type[tokens[i].type]
    # Store the index of identifier
    id_idx = i - 1

    # Check if expression follows = in assign statement
    op_value, op_type, i, func_ret_type = expression(
        tokens,
        i + 1,
        table,
        "Required expression after assignment operator",
        expect_paren=False,
        func_ret_type=func_ret_type,
    )
    #  Map datatype to appropriate datatype in C
    prec_to_type = {
        0: "string",
        1: "string",
        2: "char",
        3: "int",
        4: "float",
        5: "double",
    }
    op_value = converted_type + "---" + op_value
    # Modify datatype of the identifier
    table.symbol_table[tokens[id_idx].val][1] = prec_to_type[op_type]
    # Check if a pointer is being assigned
    if is_ptr:
        return (
            OpCode(
                "ptr_only_assign",
                table.symbol_table[tokens[id_idx].val][0] + "---" + op_value +
                "---" + str(count_ast),
                "",
            ),
            i,
            func_ret_type,
        )

    # Return the opcode and i (the token after assign statement)
    return (
        OpCode("assign",
               table.symbol_table[tokens[id_idx].val][0] + "---" + op_value,
               ""),
        i,
        func_ret_type,
    )
Exemple #30
0
    def test_compile_switch_case_default(self):
        # Test swtich, case, and default statements

        opcodes = [
            OpCode("var_assign", "a---1", "int"),
            OpCode("switch", "a", ""),
            OpCode("scope_begin", "", ""),
            OpCode("case", "1", ""),
            OpCode("scope_begin", "", ""),
            OpCode("print", '"Hello"', None),
            OpCode("scope_over", "", ""),
            OpCode("default", "", ""),
            OpCode("scope_begin", "", ""),
            OpCode("print", '"Bye"', None),
            OpCode("scope_over", "", ""),
            OpCode("scope_over", "", ""),
        ]
        table = SymbolTable()
        table.symbol_table = {
            1: ["a", "int", "variable"],
            2: ["1", "int", "constant"],
            3: ["1", "int", "constant"],
            4: ['"Hello"', "string", "constant"],
            5: ['"Bye"', "string", "constant"],
        }

        compile(opcodes, "testing.c", table)

        with open("testing.c", "r") as file:
            data = file.read().split("\n")

        os.remove("testing.c")

        self.assertEqual(
            data,
            [
                "#include <stdio.h>",
                "\tint a = 1;",
                "\tswitch(a) {",
                "\tcase 1:",
                "{",
                '\tprintf("Hello");',
                "}",
                "\tdefault:",
                "{",
                '\tprintf("Bye");',
                "}",
                "}",
                "",
            ],
        )