Ejemplo n.º 1
0
def numeric_val(source_code, i, table, scanner_obj):
    """
    Processes numeric values in the source code
    Params
    ======
    source_code (str)
        : The string containing simc source code
    i           (int)
        : The current index in the source code
    table       (SymbolTable)
        : Symbol table constructed holding information about identifiers and constants
    scanner_obj (Scanner)
        : Instance of Scanner class
    Returns
    =======
    (Token)
        : The token generated for the numeric constant
    (int)
        : Current position in source code
    """

    numeric_constant = ""

    # Loop until we get a non-digit character
    while is_digit(source_code[i]):
        numeric_constant += source_code[i]
        i += 1

    # If a numeric constant contains more than 1 decimal point (.) then that is invalid
    if numeric_constant.count(".") > 1:
        error(
            "Invalid numeric constant, cannot have more than one decimal point in a"
            " number!",
            scanner_obj.line_num,
        )

    # Check the length after . to distinguish between float and double
    length = len(
        numeric_constant.split(".")[1]) if "." in numeric_constant else 0

    # Determine type of numeric value
    type = "int"
    if length != 0:
        if length <= 7:
            type = "float"
        elif length >= 7:
            type = "double"

    # Make entry in symbol table
    id = table.entry(numeric_constant, type, "constant")

    # Return number token and current index in source code
    return Token("number", id, scanner_obj.line_num), i
Ejemplo n.º 2
0
def string_val(source_code, i, table, line_num, start_char='"'):
    """
    Processes string values in the source code

    Params
    ======
    source_code (string) = The string containing simc source code
    i           (int)    = The current index in the source code
    table       (SymbolTable) = Symbol table constructed holding information about identifiers and constants
    line_num    (int)         = Line number
    start_char  (str) (Optional) = Character with which string starts

    Returns
    =======
    Token, int: The token generated for the string constant and the current position in source code,
                this is done only if there is no error in the string constant
    """

    string_constant = ""

    # Skip the first " so that the string atleast makes into the while loop
    i += 1

    # Loop until we get a non-digit character
    while source_code[i] != start_char:
        if source_code[i] == "\0":
            error("Unterminated string!", line_num)

        string_constant += source_code[i]
        i += 1

    # Skip the " character so that it does not loop back to this function incorrectly
    i += 1

    # Determine the type of data
    type = "char"
    if len(string_constant) > 1:
        type = "string"

    # Put appropriate quote
    string_constant = ('"' + string_constant +
                       '"' if type == "string" else "'" + string_constant +
                       "'")

    # Make entry in symbol table
    id = table.entry(string_constant, type, "constant")

    # Return string token and current index in source code
    return Token("string", id, line_num), i
Ejemplo n.º 3
0
def string_val(source_code, i, table, scanner_obj, start_char='"'):
    """
    Processes string values in the source code
    Params
    ======
    source_code (str)
        : The string containing simc source code
    i           (int)
        : The current index in the source code
    table       (SymbolTable)
        : Symbol table constructed holding information about identifiers and constants
    scanner_obj (Scanner)
        : Instance of Scanner class
    start_char  (str) (Optional)
        : Character with which string starts
    Returns
    =======
    (Token)
        : The token generated for the string constant
    (int)
        : Current position in source code
    """

    string_constant = ""

    # Skip the first "/' so that the string atleast makes into the while loop
    i += 1

    # Loop until we get a non-digit character
    while source_code[i] != start_char:
        if source_code[i] == "\0":
            error("Unterminated string!", scanner_obj.line_num)

        string_constant += source_code[i]
        i += 1

    # Skip the "/' character so that it does not loop back to this function incorrectly
    i += 1

    # Put appropriate quote
    string_constant = '"' + string_constant + '"'

    # Make entry in symbol table
    id = table.entry(string_constant, "string", "constant")

    # Return string token and current index in source code
    return Token("string", id, scanner_obj.line_num), i
Ejemplo n.º 4
0
def check_if(given_type, should_be_types, msg, line_num):
    """
    Check if type matches what it should be otherwise throw an error and exit

    Params
    ======
    given_type      (string)      = Type of token to be checked
    should_be_types (string/list) = Type(s) to be compared with
    msg             (string)      = Error message to print in case some case fails
    line_num        (int)         = Line number
    """

    # Convert to list if type is string
    if type(should_be_types) == str:
        should_be_types = [should_be_types]

    # If the given_type is not part of should_be_types then throw error and exit
    if given_type not in should_be_types:
        error(msg, line_num)
Ejemplo n.º 5
0
def assign_statement(tokens, i, table, func_ret_type):
    """
    Parse assignment statement

    Params
    ======
    tokens      (list) = List of tokens
    i           (int)  = Current index in token
    table       (SymbolTable) = Symbol table constructed holding information about identifiers and constants

    Returns
    =======
    OpCode, int: The opcode for the assign code and the index after parsing assign statement

    Grammar
    =======
    var_statement   -> var id [= expr]?
    expr            -> string | number | id | operator
    string          -> quote [a-zA-Z0-9`~!@#$%^&*()_-+={[]}:;,.?/|\]+ quote
    quote           -> "
    number          -> [0-9]+
    id              -> [a-zA-Z_]?[a-zA-Z0-9_]*
    operator        -> + | - | * | /
    """

    # Check if the identifier is a pointer
    is_ptr = False
    # count depth of pointer
    count_ast = 0
    if tokens[i - 2].type == "multiply":
        j = -2
        while tokens[j + i].type == "multiply":
            j -= 1
        count_ast = -1 * j - 2
        is_ptr = True

    # Check if variable is declared or not
    value, type, _ = table.get_by_id(tokens[i - 1].val)

    if type == "var":
        error("Variable %s used before declaration" % value,
              tokens[i - 1].line_num)

    # Dictionary to convert tokens to their corresponding assignment types
    assignment_type = {
        "assignment": "=",
        "plus_equal": "+=",
        "minus_equal": "-=",
        "multiply_equal": "*=",
        "divide_equal": "/=",
        "modulus_equal": "%=",
    }
    # Check if assignment operator follows identifier name
    check_if(
        tokens[i].type,
        [
            "assignment",
            "plus_equal",
            "minus_equal",
            "multiply_equal",
            "divide_equal",
            "modulus_equal",
        ],
        "Expected assignment operator after identifier",
        tokens[i].line_num,
    )
    # Convert the token to respective symbol
    converted_type = assignment_type[tokens[i].type]
    # Store the index of identifier
    id_idx = i - 1

    # Check if expression follows = in assign statement
    op_value, op_type, i, func_ret_type = expression(
        tokens,
        i + 1,
        table,
        "Required expression after assignment operator",
        expect_paren=False,
        func_ret_type=func_ret_type,
    )
    #  Map datatype to appropriate datatype in C
    prec_to_type = {
        0: "string",
        1: "string",
        2: "char",
        3: "int",
        4: "float",
        5: "double",
    }
    op_value = converted_type + "---" + op_value
    # Modify datatype of the identifier
    table.symbol_table[tokens[id_idx].val][1] = prec_to_type[op_type]
    # Check if a pointer is being assigned
    if is_ptr:
        return (
            OpCode(
                "ptr_only_assign",
                table.symbol_table[tokens[id_idx].val][0] + "---" + op_value +
                "---" + str(count_ast),
                "",
            ),
            i,
            func_ret_type,
        )

    # Return the opcode and i (the token after assign statement)
    return (
        OpCode("assign",
               table.symbol_table[tokens[id_idx].val][0] + "---" + op_value,
               ""),
        i,
        func_ret_type,
    )
Ejemplo n.º 6
0
def var_statement(tokens, i, table, func_ret_type):
    """
    Parse variable declaration [/initialization] statement

    Params
    ======
    tokens      (list) = List of tokens
    i           (int)  = Current index in token
    table       (SymbolTable) = Symbol table constructed holding information about identifiers and constants
    func_ret_type (string) = Function return type

    Returns
    =======
    OpCode, int: The opcode for the var_assign/var_no_assign code and the index after parsing var statement

    Grammar
    =======
    var_statement   -> var id [= expr]?
    expr            -> string | number | id | operator
    string          -> quote [a-zA-Z0-9`~!@#$%^&*()_-+={[]}:;,.?/|\]+ quote
    quote           -> "
    number          -> [0-9]+
    id              -> [a-zA-Z_]?[a-zA-Z0-9_]*
    operator        -> + | - | * | /
    """

    is_ptr, count_ast, i = check_ptr(tokens, i)
    # Check if identifier is present after var
    check_if(tokens[i].type, "id", "Expected id after var keyword",
             tokens[i].line_num)

    # Tokens that are not accepted after declaration of a variable
    invalid_tokens = [
        "plus_equal",
        "minus_equal",
        "divide_equal",
        "multiply_equal",
        "plus",
        "minus",
        "divide",
        "multiply",
        "modulus",
        "modulus_equal",
        "equal",
        "not_equal",
    ]
    # Check if variable is also initialized
    if i + 1 < len(tokens) and tokens[i + 1].type == "assignment":
        # Store the index of identifier
        id_idx = i

        # Check if expression follows = in var statement
        op_value, op_type, i, func_ret_type = expression(
            tokens,
            i + 2,
            table,
            "Required expression after assignment operator",
            expect_paren=False,
            func_ret_type=func_ret_type,
        )

        # Map datatype to appropriate datatype in C
        prec_to_type = {
            0: "string",
            1: "string",
            2: "char",
            3: "int",
            4: "float",
            5: "double",
        }

        # Modify datatype of the identifier
        table.symbol_table[tokens[id_idx].val][1] = prec_to_type[op_type]

        if is_ptr:
            return (
                OpCode(
                    "ptr_assign",
                    table.symbol_table[tokens[id_idx].val][0] + "---" +
                    op_value + "---" + str(count_ast),
                    prec_to_type[op_type],
                ),
                i,
                func_ret_type,
            )
        else:
            # Return the opcode and i (the token after var statement)
            return (
                OpCode(
                    "var_assign",
                    table.symbol_table[tokens[id_idx].val][0] + "---" +
                    op_value,
                    prec_to_type[op_type],
                ),
                i,
                func_ret_type,
            )
    elif i + 1 < len(tokens) and tokens[i + 1].type in invalid_tokens:
        error("Invalid Syntax for declaration", tokens[i].line_num)
    else:
        # Get the value from symbol table by id
        value, type, _ = table.get_by_id(tokens[i].val)

        # If already declared then throw error
        if type in [
                "declared",
                "int",
                "char",
                "float",
                "double",
                "string",
                "char *",
                "char*",
        ]:
            error("Variable %s already declared" % value, tokens[i].line_num)

        # Set declared
        table.symbol_table[tokens[i].val][1] = "declared"

        # Return the opcode and i+1 (the token after var statement)
        if is_ptr:
            return OpCode("ptr_no_assign", value), i + 1, func_ret_type

        return OpCode("var_no_assign", value), i + 1, func_ret_type
Ejemplo n.º 7
0
def if_statement(tokens, i, table, func_ret_type):
    """
    Parse if statement

    Params
    ======
    tokens      (list) = List of tokens
    i           (int)  = Current index in token
    table       (SymbolTable) = Symbol table constructed holding information about identifiers and constants

    Returns
    =======
    OpCode, int: The opcode for the assign code and the index after parsing if statement

    Grammar
    =======
    if_statement -> if(condition) { body }
    condition       -> expr
    expr            -> string | number | id | operator
    string          -> quote [a-zA-Z0-9`~!@#$%^&*()_-+={[]}:;,.?/|\]+ quote
    quote           -> "
    number          -> [0-9]+
    id              -> [a-zA-Z_]?[a-zA-Z0-9_]*
    operator        -> + | - | * | /
    """
    # Check if ( follows if statement
    check_if(
        tokens[i].type,
        "left_paren",
        "Expected ( after if statement",
        tokens[i].line_num,
    )

    # check if expression follows ( in if statement
    op_value, op_type, i, func_ret_type = expression(
        tokens,
        i + 1,
        table,
        "Expected expression inside if statement",
        func_ret_type=func_ret_type,
    )
    op_value_list = op_value.replace(" ", "").split(",")
    # check if ) follows expression in if statement
    check_if(
        tokens[i - 1].type,
        "right_paren",
        "Expected ) after expression in if statement",
        tokens[i - 1].line_num,
    )

    # If \n follows ) then skip all the \n characters
    if tokens[i + 1].type == "newline":
        i += 1
        while tokens[i].type == "newline":
            i += 1
        i -= 1

    # Check if { follows ) in if statement
    check_if(
        tokens[i + 1].type,
        "left_brace",
        "Expected { before if body",
        tokens[i + 1].line_num,
    )

    # Loop until } is reached
    i += 2
    ret_idx = i
    found_right_brace = False
    while i < len(tokens) and tokens[i].type != "right_brace":
        if found_right_brace:
            found_right_brace = True
        i += 1

    # If right brace found at end
    if i != len(tokens) and tokens[i].type == "right_brace":
        found_right_brace = True

    # If right brace is not found then produce error
    if not found_right_brace:
        error("Expected } after if body", tokens[i].line_num)

    return OpCode("if", op_value[:-1]), ret_idx - 1, func_ret_type
Ejemplo n.º 8
0
def function_call_statement(tokens, i, table, func_ret_type):
    """
    Parse function calling statement

    Params
    ======
    tokens        (list)        = List of tokens
    i             (int)         = Current index in token
    table         (SymbolTable) = Symbol table constructed holding information about identifiers and constants
    func_ret_type (dict)        = If return type of function is not figured yet

    Returns
    =======
    OpCode, int, dict: The opcode for the assign code, index after parsing function calling statement and function return type

    Grammar
    =======
    function_call_statement   -> id([actual_params,]*)
    actual_params             -> expr
    body                      -> statement
    expr                      -> string | number | id | operator
    string                    -> quote [a-zA-Z0-9`~!@#$%^&*()_-+={[]}:;,.?/|\]+ quote
    quote                     -> "
    number                    -> [0-9]+
    id                        -> [a-zA-Z_]?[a-zA-Z0-9_]*
    operator                  -> + | - | * | /
    """

    # Get information about the function from symbol table
    func_name, _, metadata = table.get_by_id(tokens[i].val)

    # Extract params from functions metadata (typedata), these are stored as <id>---[<param 1>, . . . , <param n>]
    params = metadata.split("---")[1:] if "---" in metadata else [")"]
    num_formal_params = len(params) if params != [")"] else 0

    # Parse the params
    op_value, op_type, i, func_ret_type = expression(
        tokens,
        i + 2,
        table,
        "",
        True,
        True,
        expect_paren=True,
        func_ret_type=func_ret_type,
    )
    op_value_list = op_value.replace(" ", "").split(",")
    op_value_list = (op_value_list if len(op_value_list) > 0
                     and len(op_value_list[0]) > 0 else [])
    num_actual_params = len(op_value_list) if op_value_list != [")"] else 0

    # Check if number of actual and formal parameters match
    if num_formal_params != num_actual_params:
        error(
            "Expected %d parameters but got %d parameters in function %s" %
            (num_formal_params, num_actual_params, func_name),
            tokens[i].line_num,
        )

    # Assign datatype to formal parameters
    for j in range(len(params)):
        # If parameter list is empty
        if params[j] == ")":
            continue

        # Fetch the datatype of corresponding actual parameter from symbol table
        _, dtype, _ = table.get_by_id(
            table.get_by_symbol(op_value_list[j].replace(")", "")))

        # Set the datatype of the formal parameter
        table.symbol_table[table.get_by_symbol(params[j])][1] = dtype

    if func_name in func_ret_type.keys():
        _, op_type, _, _ = expression(tokens, func_ret_type[func_name], table,
                                      "")

        #  Map datatype to appropriate datatype in C
        prec_to_type = {
            0: "char*",
            1: "char*",
            2: "char",
            3: "int",
            4: "float",
            5: "double",
        }

        table.symbol_table[table.get_by_symbol(
            func_name)][1] = prec_to_type[op_type]
        del func_ret_type[func_name]

    return (
        OpCode("func_call", func_name + "---" + "&&&".join(op_value_list)[:-1],
               ""),
        i + 1,
        func_ret_type,
    )
Ejemplo n.º 9
0
def expression(
    tokens,
    i,
    table,
    msg,
    accept_unkown=False,
    accept_empty_expression=False,
    expect_paren=True,
    func_ret_type={},
):
    """
    Parse and expression from tokens

    Params
    ======
    tokens                  (list)        = List of tokens
    i                       (string/list) = Current index in list of tokens
    table                   (SymbolTable) = Symbol table constructed holding information about identifiers and constants
    msg                     (string)      = Error message to print in case some case fails
    accept_unkown           (bool)        = Accept unknown type for variable or not
    accept_empty_expression (bool)        = Accept empty expression or not
    expect_paren            (bool)        = Expect parenthesis at the end
    func_ret_type           (string)      = Functions return type

    Returns
    =======
    string, string, int: The expression, datatype of the expression and the current index in source
                         code after parsing
    """

    # Initial values
    op_value = ""
    op_type = -1

    # Mapping for precedence checking (double > float > int)
    type_to_prec = {"int": 3, "float": 4, "double": 5}

    # Loop until expression is not parsed completely
    while i < len(tokens) and tokens[i].type in [
            "number",
            "input",
            "string",
            "id",
            "plus",
            "minus",
            "multiply",
            "divide",
            "comma",
            "equal",
            "not_equal",
            "greater_than",
            "less_than",
            "greater_than_equal",
            "less_than_equal",
            "modulus",
            "increment",
            "decrement",
            "plus_equal",
            "minus_equal",
            "multiply_equal",
            "divide_equal",
            "modulus_equal",
            "and",
            "or",
            "left_paren",
            "exit",
            "right_paren",
            "newline",
            "call_end",
            "address_of",
            "right_shift",
            "left_shift",
    ]:
        # Check for function call
        if tokens[i].type == "id" and tokens[i + 1].type == "left_paren":
            fun_opcode, i, func_ret_type = function_call_statement(
                tokens, i, table, func_ret_type)
            val = fun_opcode.val.split("---")
            params = val[1].split("&&&")
            op_value += val[0] + "(" + ", ".join(params) + ")"
            type_to_prec = {
                "char*": 1,
                "char": 2,
                "int": 3,
                "float": 4,
                "double": 5
            }
            op_type = type_to_prec[table.get_by_id(table.get_by_symbol(
                val[0]))[1]]
            i -= 1
        # If token is identifier or constant
        elif tokens[i].type in ["number", "string", "id"]:
            # Fetch information from symbol table
            value, type, typedata = table.get_by_id(tokens[i].val)

            if type == "string":
                # If { in string then it is a f-string
                if "{" in value:
                    vars = []
                    temp_var = ""
                    enter = False

                    # Collect the variable names
                    for char in value:
                        if char == "{":
                            enter = True
                        elif char == "}":
                            vars.append(temp_var[1:])
                            temp_var = ""
                            enter = False

                        if enter:
                            temp_var += char

                    # Determine the type of variables and append the name of variables at the end
                    type_to_fs = {
                        "char": "%c",
                        "string": "%s",
                        "int": "%d",
                        "float": "%f",
                        "double": "%lf",
                    }
                    for var in vars:
                        _, type, _ = table.get_by_id(table.get_by_symbol(var))
                        if type == "var":
                            error("Unknown variable %s" % var,
                                  tokens[i].line_num)
                        value = value.replace(var, type_to_fs[type])
                        value += ", " + var

                    # Replace all {} in string
                    value = value.replace("{", "").replace("}", "")

                op_value += value
                op_type = 0 if typedata == "constant" else 1
            elif type == "char":
                op_value += value
                op_type = 2
            elif type == "int":
                op_value += str(value)
                op_type = (type_to_prec["int"]
                           if type_to_prec["int"] > op_type else op_type)
            elif type == "float":
                op_value += str(value)
                op_type = (type_to_prec["float"]
                           if type_to_prec["float"] > op_type else op_type)
            elif type == "double":
                op_value += str(value)
                op_type = (type_to_prec["double"]
                           if type_to_prec["double"] > op_type else op_type)
            elif type in ["var", "declared"] and not accept_unkown:
                error("Cannot find the type of %s" % value, tokens[i].line_num)
            elif type == "var" and accept_unkown:
                op_value += str(value)
        elif tokens[i].type in ["newline", "call_end"]:
            break
        else:
            word_to_op = {
                "plus": " + ",
                "minus": " - ",
                "multiply": " * ",
                "divide": " / ",
                " comma ": ", ",
                "equal": " == ",
                "not_equal": " != ",
                "greater_than": " > ",
                "less_than": " < ",
                "greater_than_equal": " >= ",
                "less_than_equal": " <= ",
                "input": " scanf ",
                "modulus": " % ",
                "increment": " ++ ",
                "decrement": " -- ",
                "plus_equal": " += ",
                "minus_equal": " -= ",
                "multiply_equal": " *= ",
                "divide_equal": " /= ",
                "modulus_equal": " %= ",
                "and": " && ",
                "or": " || ",
                "comma": ",",
                "left_paren": "(",
                "right_paren": ")",
                "address_of": "&",
                "left_shift": " << ",
                "right_shift": " >> ",
            }

            if (expect_paren and tokens[i].type == "right_paren"
                    and tokens[i + 1].type in ["newline", "left_brace"]):
                break

            op_value += word_to_op[tokens[i].type]

        i += 1

    # If expression is empty then throw an error
    if op_value == "" and not accept_empty_expression:
        error(msg, tokens[i].line_num)

    # Check if statement is of type input
    if " scanf " in op_value:

        # Check if there exists a prompt message
        if '"' in op_value:
            i1 = op_value.index('"') + 1
            i2 = op_value.index('"', i1)
            # Extracting the prompt
            p_msg = op_value[i1:i2]
            # Checking if dtype is mentioned
            if "'" in op_value[i2 + 1:]:
                i1 = op_value.index("'", i2 + 1) + 1
                i2 = op_value.index("'", i1)
                dtype = op_value[i1:i2]
            else:
                # default dtype is string
                dtype = "s"
        else:
            p_msg = ""
            dtype = "s"
        dtype_to_prec = {"i": 3, "f": 4, "d": 5, "s": 1}
        op_value = str(p_msg) + "---" + str(dtype)
        op_type = dtype_to_prec[dtype]

    # Return the expression, type of expression, and current index in source codes
    return op_value, op_type, i, func_ret_type
Ejemplo n.º 10
0
def function_definition_statement(tokens, i, table, func_ret_type):
    """
    Parse function definition statement

    Params
    ======
    tokens      (list) = List of tokens
    i           (int)  = Current index in token
    table       (SymbolTable) = Symbol table constructed holding information about identifiers and constants
    func_ret_type (string) = Function return type

    Returns
    =======
    OpCode, int, string: The opcode for the assign code, the index, and the name of the function after
                         parsing function calling statement

    Grammar
    =======
    function_definition_statement   -> fun id([formal_params,]*) { body }
    formal_params                   -> expr
    body                            -> statement
    expr                            -> string | number | id | operator
    string                          -> quote [a-zA-Z0-9`~!@#$%^&*()_-+={[]}:;,.?/|\]+ quote
    quote                           -> "
    number                          -> [0-9]+
    id                              -> [a-zA-Z_]?[a-zA-Z0-9_]*
    operator                        -> + | - | * | /
    """

    # Check if identifier follows fun
    check_if(tokens[i].type, "id", "Expected function name",
             tokens[i].line_num)

    # Store the id of function name in symbol table
    func_idx = tokens[i].val

    # Get function name
    func_name, _, _ = table.get_by_id(func_idx)

    # Check if ( follows id in function
    check_if(
        tokens[i + 1].type,
        "left_paren",
        "Expected ( after function name",
        tokens[i + 1].line_num,
    )

    # Check if expression follows ( in function statement
    op_value, op_type, i, func_ret_type = expression(
        tokens, i + 2, table, "", True, True, func_ret_type=func_ret_type)
    op_value_list = op_value.replace(" ", "").replace(")", "").split(",")

    # Check if ) follows expression in function
    check_if(
        tokens[i - 1].type,
        "right_paren",
        "Expected ) after function params list",
        tokens[i - 1].line_num,
    )

    # If \n follows ) then skip all the \n characters
    if tokens[i + 1].type == "newline":
        i += 1
        while tokens[i].type == "newline":
            i += 1
        i -= 1

    # Check if { follows ) in function
    check_if(
        tokens[i + 1].type,
        "left_brace",
        "Expected { before function body",
        tokens[i + 1].line_num,
    )

    # Loop until } is reached
    i += 2
    ret_idx = i
    found_right_brace = False
    while i < len(tokens) and tokens[i].type != "right_brace":
        if tokens[i].type == "right_brace":
            found_right_brace = True
        i += 1

    # If right brace found at end
    if i != len(tokens) and tokens[i].type == "right_brace":
        found_right_brace = True

    # If right brace is not found then produce error
    if not found_right_brace:
        error("Expected } after function body", tokens[i].line_num)

    # Add the identifier types to function's typedata
    table.symbol_table[func_idx][2] = (
        "function---" + "---".join(op_value_list) if len(op_value_list) > 0
        and len(op_value_list[0]) > 0 else "function")

    return (
        OpCode("func_decl", func_name + "---" + "&&&".join(op_value_list), ""),
        ret_idx - 1,
        func_name,
        func_ret_type,
    )
Ejemplo n.º 11
0
def parse(tokens, table):
    """
    Parse tokens and generate opcodes

    Params
    ======
    tokens (list) = List of tokens

    Returns
    =======
    list: The list of opcodes

    Grammar
    =======
    statement -> print_statement | var_statement | assign_statement | function_definition_statement
    """

    # List of opcodes
    op_codes = []

    # Current function's name
    func_name = ""

    # Do while started or not
    in_do = False

    # Count main functions
    main_fn_count = 0

    # Count if conditions
    if_count = 0

    # Brace count
    brace_count = 0

    # If function return type could not be figured out during return then do it while calling
    func_ret_type = {}

    # Loop through all the tokens
    i = 0
    while i <= len(tokens) - 1:
        # If token is of type print then generate print opcode
        if tokens[i].type == "print":
            print_opcode, i, func_ret_type = print_statement(
                tokens, i + 1, table, func_ret_type)
            op_codes.append(print_opcode)
        # If token is of type var then generate var opcode
        elif tokens[i].type == "var":
            var_opcode, i, func_ret_type = var_statement(
                tokens, i + 1, table, func_ret_type)
            op_codes.append(var_opcode)
        # If token is of type id then generate assign opcode
        elif tokens[i].type == "id":
            # If '(' follows id then it is function calling else variable assignment
            if tokens[i + 1].type == "left_paren":
                fun_opcode, i, func_ret_type = function_call_statement(
                    tokens, i, table, func_ret_type)
                op_codes.append(fun_opcode)
            elif tokens[i + 1].type in ["increment", "decrement"]:
                unary_opcode, i, func_ret_type = unary_statement(
                    tokens, i, table, func_ret_type)
                op_codes.append(unary_opcode)
            else:
                assign_opcode, i, func_ret_type = assign_statement(
                    tokens, i + 1, table, func_ret_type)
                op_codes.append(assign_opcode)
        # If token is of type fun then generate function opcode
        elif tokens[i].type == "fun":
            fun_opcode, i, func_name, func_ret_type = function_definition_statement(
                tokens, i + 1, table, func_ret_type)
            op_codes.append(fun_opcode)
        # If token is of type left_brace then generate scope_begin opcode
        elif tokens[i].type == "left_brace":
            op_codes.append(OpCode("scope_begin", "", ""))
            brace_count += 1
            i += 1
        # If token is of type right_brace then generate scope_over opcode
        elif tokens[i].type == "right_brace":
            op_codes.append(OpCode("scope_over", "", ""))
            brace_count -= 1

            if brace_count < 0:
                error(
                    "Closing brace doesn't match any previous opening brace",
                    tokens[i].line_num,
                )
            i += 1
        # If token is of type MAIN then generate MAIN opcode
        elif tokens[i].type == "MAIN":
            op_codes.append(OpCode("MAIN", "", ""))
            main_fn_count += 1
            if main_fn_count > 1:
                error("Presence of two MAIN in a single file",
                      tokens[i].line_num)
            i += 1
        # If token is of type END_MAIN then generate MAIN opcode
        elif tokens[i].type == "END_MAIN":
            op_codes.append(OpCode("END_MAIN", "", ""))
            main_fn_count -= 1
            i += 1
        # If token is of type for then generate for code
        elif tokens[i].type == "for":
            for_opcode, i, func_ret_type = for_statement(
                tokens, i + 1, table, func_ret_type)
            op_codes.append(for_opcode)
        # If token is of type do then generate do_while code
        elif tokens[i].type == "do":
            check_if(
                tokens[i + 1].type,
                "left_brace",
                "Expected { after do statement",
                tokens[i + 1].line_num,
            )
            in_do = True
            op_codes.append(OpCode("do", "", ""))
            i += 1
        # If token is of type while then generate while opcode
        elif tokens[i].type == "while":
            while_opcode, i, func_ret_type = while_statement(
                tokens, i + 1, table, in_do, func_ret_type)
            if in_do:
                in_do = False
            op_codes.append(while_opcode)
        # If token is of type if then generate if opcode
        elif tokens[i].type == "if":
            if_opcode, i, func_ret_type = if_statement(tokens, i + 1, table,
                                                       func_ret_type)
            op_codes.append(if_opcode)

            # Increment if count on encountering if
            if_count += 1
        # If token is of type exit then generate exit opcode
        elif tokens[i].type == "exit":
            exit_opcode, i, func_ret_type = exit_statement(
                tokens, i + 1, table, func_ret_type)
            op_codes.append(exit_opcode)
        # If token is of type else then check whether it is else if or else
        elif tokens[i].type == "else":
            # If the next token is if, then it is else if
            if tokens[i + 1].type == "if":
                if_opcode, i, func_ret_type = if_statement(
                    tokens, i + 2, table, func_ret_type)
                if_opcode.type = "else_if"
                op_codes.append(if_opcode)
            # Otherwise it is else
            elif tokens[i + 1].type == "left_brace":
                op_codes.append(OpCode("else", "", ""))

                # Decrement if count on encountering if, to make sure there aren't extra else conditions
                if_count -= 1

                # If if_count is negative then the current else is extra
                if if_count < 0:
                    error("Else does not match any if!", tokens[i].line_num)

                i += 1
        # If token is of type return then generate return opcode
        elif tokens[i].type == "return":
            beg_idx = i + 1
            if tokens[i + 1].type not in ["id", "number", "string"]:
                op_value = ""
                op_type = 6
                i += 2
            else:
                op_value, op_type, i, func_ret_type = expression(
                    tokens,
                    i + 1,
                    table,
                    "Expected expression after return",
                    True,
                    True,
                    expect_paren=False,
                    func_ret_type=func_ret_type,
                )
            if func_name == "":
                error("Return statement outside any function",
                      tokens[i].line_num)
            else:
                #  Map datatype to appropriate datatype in C
                prec_to_type = {
                    -1: "not_known",
                    0: "char*",
                    1: "char*",
                    2: "char",
                    3: "int",
                    4: "float",
                    5: "double",
                    6: "void",
                }

                if op_type == -1:
                    func_ret_type[func_name] = beg_idx

                # Change return type of function
                table.symbol_table[table.get_by_symbol(
                    func_name)][1] = prec_to_type[op_type]

                # Set func_name to an empty string after processing
                func_name = ""
            op_codes.append(OpCode("return", op_value, ""))
        # If token is of type break then generate break opcode
        elif tokens[i].type == "break":
            op_codes.append(OpCode("break", "", ""))
            i += 1
        # If token is of type continue then generate continue opcode
        elif tokens[i].type == "continue":
            op_codes.append(OpCode("continue", "", ""))
            i += 1
        # If token is of type single_line_statement then generate single_line_comment opcode
        elif tokens[i].type == "single_line_comment":
            op_codes.append(OpCode("single_line_comment", tokens[i].val, ""))
            i += 1
        # If token is of type multi_line_statement then generate multi_line_comment opcode
        elif tokens[i].type == "multi_line_comment":
            op_codes.append(OpCode("multi_line_comment", tokens[i].val, ""))
            i += 1
        # If token is of type switch then generate switch opcode
        elif tokens[i].type == "switch":
            switch_opcode, i, func_ret_type = switch_statement(
                tokens, i + 1, table, func_ret_type)
            op_codes.append(switch_opcode)
        # If token is of type case then generate case opcode
        elif tokens[i].type == "case":
            case_opcode, i, func_ret_type = case_statement(
                tokens, i + 1, table, func_ret_type)
            op_codes.append(case_opcode)
        # If token is of type default then generate default opcode
        elif tokens[i].type == "default":
            check_if(
                tokens[i + 1].type,
                "colon",
                "Expected : after default statement in switch",
                tokens[i + 1].line_num,
            )
            op_codes.append(OpCode("default", "", ""))
            i += 2
        # If token is the type increment or decrement then generate unary_opcode
        elif tokens[i].type in ["increment", "decrement"]:
            unary_opcode, i, func_ret_type = unary_statement(
                tokens, i, table, func_ret_type)
            op_codes.append(unary_opcode)

        # Otherwise increment the index
        else:
            i += 1

    # Errors that may occur after parsing loop
    if main_fn_count != 0:
        error("MAIN not ended with END_MAIN", tokens[i - 1].line_num + 1)

    # Return opcodes
    return op_codes
Ejemplo n.º 12
0
def lexical_analyze(filename, table):
    """
    Generate tokens from source code

    Params
    ======
    filename    (string)      = The string containing simc source code filename
    table       (SymbolTable) = Symbol table constructed holding information about identifiers and constants

    Returns
    ========
    list: A list of tokens of the source code, if the code is lexically correct, otherwise
          presents user with an error
    """

    # Check if file extension is .simc or not
    if "." not in filename or filename.split(".")[-1] != "simc":
        error("Incorrect file extension", line_num)

    # Read the entire source code as a string
    source_code = open(filename, "r").read()
    source_code += "\0"

    # List of tokens
    tokens = []

    # Line number
    line_num = 1

    # Parantheses checker for detecting function call
    parantheses_count = 0

    # To store comment string
    comment_str = ""

    # Loop through the source code character by character
    i = 0
    while source_code[i] != "\0":
        # If a digit appears, call numeric_val function and add the numeric token to list,
        # if it was correct
        if is_digit(source_code[i]):
            token, i = numeric_val(source_code, i, table, line_num)
            tokens.append(token)

        # If double quote appears the value is a string token
        elif source_code[i] == '"':
            token, i = string_val(source_code, i, table, line_num)
            tokens.append(token)

        # If single quote appears the value is a string token
        elif source_code[i] == "'":
            token, i = string_val(source_code,
                                  i,
                                  table,
                                  line_num,
                                  start_char="'")
            tokens.append(token)

        # If alphabet or number appears then it might be either a keyword or an identifier
        elif is_alnum(source_code[i]):
            token, i = keyword_identifier(source_code, i, table, line_num)
            tokens.append(token)

        # Identifying left paren token
        elif source_code[i] == "(":
            if tokens[-1].type == "id" or parantheses_count > 0:
                parantheses_count += 1
            tokens.append(Token("left_paren", "", line_num))
            i += 1

        # Identifying right paren token
        elif source_code[i] == ")":
            if parantheses_count > 0:
                parantheses_count -= 1

            tokens.append(Token("right_paren", "", line_num))

            if parantheses_count == 0:
                tokens.append(Token("call_end", "", line_num))

            i += 1

        # Identifying left brace token
        elif source_code[i] == "{":
            tokens.append(Token("left_brace", "", line_num))
            i += 1

        # Identifying right brace token
        elif source_code[i] == "}":
            tokens.append(Token("right_brace", "", line_num))
            i += 1

        # Identifying newline token
        elif source_code[i] == "\n":
            tokens.append(Token("newline", "", line_num))
            line_num += 1
            i += 1

        # Identifying assignment token or equivalence token
        elif source_code[i] == "=":
            if source_code[i + 1] != "=":
                tokens.append(Token("assignment", "", line_num))
                i += 1
            else:
                tokens.append(Token("equal", "", line_num))
                i += 2

        # Identifying plus_equal, increment or plus token
        elif source_code[i] == "+":
            if source_code[i + 1] == "=":
                tokens.append(Token("plus_equal", "", line_num))
                i += 2
            elif source_code[i + 1] == "+":
                tokens.append(Token("increment", "", line_num))
                i += 2
            else:
                tokens.append(Token("plus", "", line_num))
                i += 1

        # Identifying minus_equal, decrement or minus token
        elif source_code[i] == "-":
            if source_code[i + 1] == "=":
                tokens.append(Token("minus_equal", "", line_num))
                i += 2
            elif source_code[i + 1] == "-":
                tokens.append(Token("decrement", "", line_num))
                i += 2
            else:
                tokens.append(Token("minus", "", line_num))
                i += 1

        # Identifying multiply_equal or multiply token
        elif source_code[i] == "*":
            if source_code[i + 1] == "=":
                tokens.append(Token("multiply_equal", "", line_num))
                i += 2
            else:
                tokens.append(Token("multiply", "", line_num))
                i += 1

        # Identifying 'address of' token
        elif source_code[i] == "&":
            tokens.append(Token("address_of", "", line_num))
            i += 1

        # Identifying divide_equal or divide token
        elif source_code[i] == "/":
            if source_code[i + 1] == "=":
                tokens.append(Token("divide_equal", "", line_num))
                i += 2
            # to check if it is a single line comment
            elif source_code[i + 1] == "/":
                i += 2
                while source_code[i] != "\n":
                    comment_str += str(source_code[i])
                    i += 1
                tokens.append(
                    Token("single_line_comment", comment_str, line_num))
                comment_str = ""
            # to check if it is a multi line comment
            elif source_code[i + 1] == "*":
                i += 2
                while source_code[i] != "*" and source_code[i + 1] != "/":
                    comment_str += str(source_code[i])
                    i += 1
                tokens.append(
                    Token("multi_line_comment", comment_str, line_num))
                comment_str = ""
            else:
                tokens.append(Token("divide", "", line_num))
                i += 1

        # Identifying modulus_equal or modulus token
        elif source_code[i] == "%":
            if source_code[i + 1] == "=":
                tokens.append(Token("modulus_equal", "", line_num))
                i += 2
            else:
                tokens.append(Token("modulus", "", line_num))
                i += 1

        # Identifying comma token
        elif source_code[i] == ",":
            tokens.append(Token("comma", "", line_num))
            i += 1

        # Identifying not_equal token
        elif source_code[i] == "!" and source_code[i + 1] == "=":
            tokens.append(Token("not_equal", "", line_num))
            i += 2

        # Identifying greater_than or greater_than_equal token
        elif source_code[i] == ">":
            if source_code[i + 1] not in ["=", ">"]:
                tokens.append(Token("greater_than", "", line_num))
                i += 1
            elif source_code[i + 1] == "=":
                tokens.append(Token("greater_than_equal", "", line_num))
                i += 2
            else:
                tokens.append(Token("right_shift", "", line_num))
                i += 2

        # Identifying less_than or less_than_equal token
        elif source_code[i] == "<":
            if source_code[i + 1] not in ["<", "="]:
                tokens.append(Token("less_than", "", line_num))
                i += 1
            elif source_code[i + 1] == "=":
                tokens.append(Token("less_than_equal", "", line_num))
                i += 2
            elif source_code[i + 1] == "<":
                tokens.append(Token("left_shift", "", line_num))
                i += 2

        # Identifiying colon token
        elif source_code[i] == ":":
            tokens.append(Token("colon", "", line_num))
            i += 1

        # Otherwise increment the index
        else:
            i += 1

    # Return the generated tokens
    return tokens
Ejemplo n.º 13
0
def keyword_identifier(source_code, i, table, line_num):
    """
    Process keywords and identifiers in source code

    Params
    ======
    source_code (string) = The string containing simc source code
    i           (int)    = The current index in the source code
    table       (SymbolTable) = Symbol table constructed holding information about identifiers and constants
    line_num    (int)         = Line number

    Returns
    =======
    Token, int: The token generated for the keyword or identifier and the current position in source code
    """

    value = ""

    # Loop until we get a non-digit character
    while is_alnum(source_code[i]):
        value += source_code[i]
        i += 1

    # Check if value is keyword or not
    if is_keyword(value):
        return Token(value, "", line_num), i

    # Check if identifier is in symbol table
    id = table.get_by_symbol(value)

    C_keywords = [
        "break",
        "else",
        "long",
        "switch",
        "case",
        "enum",
        "register",
        "typedef",
        "char",
        "extern",
        "return",
        "union",
        "const",
        "float",
        "short",
        "unsigned",
        "continue",
        "for",
        "signed",
        "void",
        "default",
        "goto",
        "sizeof",
        "volatile",
        "do",
        "if",
        "static",
        "while",
    ]

    # Check if identifier is a keyword in class
    if value in C_keywords:
        error("A keyword cannot be an identifier - %s" % value, line_num)

    # If identifier is not in symbol table then give a placeholder datatype var
    if id == -1:
        id = table.entry(value, "var", "variable")

    # Return id token and current index in source code
    return Token("id", id, line_num), i