def generate_assignment(assignment): # await = if 'await' is present, same for new, root is root variable await, new, root = False, False, None for item in assignment.content: # NOTE assumes everything is an AST # mark await if it is encountered if item.name == 'await': await = True # mark new if it is encounted elif item.name == 'new': new = True # generate the assignment variable if the given AST is encountered elif item.name == 'assign_var': root = generate_assign_var(item) # add trailer if it detected elif item.name == 'trailer': root = add_trailer(root, item) # handle extended assignment expression elif item.name == 'assignment_expr': # await and new are invalid on assignment if await or new: errormodule.throw('semantic_error', 'Invalid operands for %s operator' % 'await' if await else 'new', item) root = generate_assignment_expr(root, item) # check await if await: # if it is not returning and Incomplete Type, an async function was not called if not isinstance(root.data_type, types.Future): errormodule.throw('semantic_error', 'Unable to await object', assignment) root = StatementNode('Expr', ExprNode('Await', root.data_type.data_type, root)) # check new if new: expr = None # if it is not a structure of a group if root.data_type != types.DataTypes.MODULE: # if it is a data type if isinstance(root.data_type, types.DataTypeLiteral): # get new pointer type dt = copy(root.data_type) dt.pointers += 1 # return memory allocation with size of type expr = ExprNode('Malloc', dt, ExprNode('SizeOf', types.DataType(types.DataTypes.INT, 1), root)) else: # if it is not an integer if root.data_type != types.DataType(types.DataTypes.INT, 0): # all tests failed, not allocatable errormodule.throw('semantic_error', 'Unable to dynamically allocate memory for object', assignment) else: # malloc for just int size expr = ExprNode('Malloc', types.VOID_PTR, root) else: dt = copy(root.data_type) dt.instance = True # return object instance expr = ExprNode('CreateObjectInstance', dt, root) root = StatementNode('Expr', expr) # return compiled root return root
def generate_id_type(id_type): # handle this token # uses Module.get_instance() if id_type.content[0].type == 'THIS': return get_instance() # otherwise look up symbol and return identifier else: sym = util.symbol_table.look_up(id_type.content[0].value) if not sym: errormodule.throw('semantic_error', 'Variable \'%s\' not defined' % id_type.content[0].value, id_type) return Identifier(sym.name, sym.data_type, Modifiers.CONSTANT in sym.modifiers)
def run_parser(self, table, grammar): # primes stack and ect. # position in input pos = 0 # stack declaration stack = ["$", grammar.start_symbol] # stack for holding building AST sem_stack = [ASTNode(grammar.start_symbol)] self.input_buffer.append(Token("$", "$", self.input_buffer[-1].ndx)) # enter cycle while len(stack) > 0: if stack[len(stack) - 1] == "queue": # handles closing of ASTs # ensures that the it is not an empty AST if len(sem_stack[-1].content) > 0: sem_stack[-2].content.append(sem_stack[-1]) sem_stack.pop() stack.pop() continue # handles non terminals elif stack[len(stack) - 1] in grammar.nonterminals: nt = stack.pop() sem_stack.append(ASTNode(nt)) if self.input_buffer[pos].type not in table[nt]: er.throw("syntax_error", "Unexpected Token", [ self.input_buffer[pos], [str(x) for x in table[nt].keys()] ]) if table[nt][self.input_buffer[pos].type] != ["$"]: stack += reversed(table[nt][self.input_buffer[pos].type] + ["queue"]) # handles epsilon elif stack[-1] == "&": stack.pop() if stack[-1] == "queue": stack.pop() sem_stack.pop() # handles terminals else: if stack[len(stack) - 1] == self.input_buffer[pos].type: if self.input_buffer[pos].type != "$": sem_stack[-1].content.append(self.input_buffer[pos]) stack.pop() else: er.throw("syntax_error", "Unexpected Token", [self.input_buffer[pos], stack[-1]]) pos += 1 return sem_stack[0]
def add_final_variable(): # holding dictionary final_variable = {} # add extension if it exists if 'extension' in variable: final_variable['data_type'] = variable['extension'] # if the variable has an initializer if 'initializer' in variable: # if it has a data type, type check the initializer if 'data_type' in final_variable: if not types.coerce(final_variable['data_type'], variable['initializer'].data_type): errormodule.throw('semantic_error', 'Variable type extension and initializer data types do not match', variable['name']) else: # else infer from initializer final_variable['data_type'] = variable['initializer'].data_type # add initializer to variable final_variable['initializer'] = variable['initializer'] # add constexpr designation final_variable['constexpr'] = variable['constexpr'] # add synthesized object to variables variables[variable['name']] = type('Object', (), final_variable)
def generate_delete(stmt): # hold Identifiers to be put in final StatementNode identifiers = [] # hold the initial variable names given variables = [stmt.content[1]] # if there are multiple variables if isinstance(stmt.content[-1], ASTNode): # iterate through unparsed statement and add all identifiers for item in unparse(stmt.content[-1]): if item.type == 'IDENTIFIER': variables.append(item) # iterate through generated variable list for item in variables: # look up to get identifier sym = util.symbol_table.look_up(item.value) # attempt to delete, fail if impossible if not util.symbol_table.delete(item.value): errormodule.throw('semantic_error', 'Unable to delete non-existent symbol', item) # add identifiers to list identifiers.append(Identifier(sym.name, sym.data_type, Modifiers.CONSTANT in sym.modifiers)) return StatementNode('Delete', *identifiers)
def load_package(include_stmt, extern=False): # package name name = '' # if it is anonymous used = False # alias if necessary alias = None for item in include_stmt.content: if isinstance(item, ASTNode): # update name to be end of suffix if item.name == 'dot_id': name = unparse(item)[-1].value # if this sub tree exists, that means the inclusion is used elif item.name == 'use': used = True # if there is a rename, that means an alias was used elif item.name == 'rename': alias = item.content[0].value[1:-1] if not re.match(r'[^\d\W]\w*', alias): errormodule.throw('package_error', 'Invalid package name', include_stmt) else: # get base name if item.type == 'IDENTIFIER': name = item.value # package cannot be used and external if used and extern: errormodule.throw('package_error', 'Package cannot be both external and anonymous.', include_stmt) # if get fails due to file path not being found try: code, path = get(name) except FileNotFoundError: errormodule.throw('package_error', 'Unable to locate package by name \'%s\'.' % name, include_stmt) return # prevent redundant imports if name in imports: ast = imports[name] # if working directory needs to be updated elif path: # store cwd cwd = os.getcwd() # chdir to the parent directory of the file to allow for local importing os.chdir(path) # get the data necessary ast = get_ast(code) # change back to cwd os.chdir(cwd) else: # just get the ast, no cwd recursion necessary ast = get_ast(code) # set alias if there was none provided if not alias: alias = name if name not in imports: imports[name] = ast return util.Package(alias, extern, used, ast)
def generate_variable_dict(ast): for item in ast.content: # if is an AST if isinstance(item, ASTNode): # add extension if item.name == 'extension': variable['extension'] = generate_type(item.content[-1]) # add initializer and constexpr elif item.name == 'initializer': variable['initializer'] = generate_expr(item.content[-1]) # check for constexpr from initializer operator variable['constexpr'] = item.content[0].type == ':=' # perform constexpr check if is constexpr if variable['constexpr']: if not check_constexpr(variable['initializer']): errormodule.throw('semantic_error', 'Expected constexpr', item.content[-1]) # recur and continue building variable dictionary elif item.name == 'multi_var': add_final_variable() generate_variable_dict(item) # otherwise assume token and check if it identifier elif item.type == 'IDENTIFIER': variable['name'] = item
def generate_assign_var(assign_var): # generate identifier def generate_id_type(id_type): # handle this token # uses Module.get_instance() if id_type.content[0].type == 'THIS': return get_instance() # otherwise look up symbol and return identifier else: sym = util.symbol_table.look_up(id_type.content[0].value) if not sym: errormodule.throw('semantic_error', 'Variable \'%s\' not defined' % id_type.content[0].value, id_type) return Identifier(sym.name, sym.data_type, Modifiers.CONSTANT in sym.modifiers) # if there is single ASTNode, assume the it is id_types if isinstance(assign_var.content[0], ASTNode): # return generate id type return generate_id_type(assign_var.content[0]) # there is a dereference operator else: # check if there is an id type after the dereference operator if isinstance(assign_var.content[-1].content[0], ASTNode): root = generate_id_type(assign_var.content[-1].content[0]) # otherwise generate sub var else: root = generate_assign_var(assign_var.content[-1].content[1]) # check for trailer if len(assign_var.content[-1].content) > 3: root = add_trailer(root, assign_var.content[-1].content[2]) # calculate the dereference count deref_count = 1 if len(assign_var.content) == 2 else len(unparse(assign_var.content[1])) + 1 # check for non-pointer dereference if deref_count > root.data_type.pointers: errormodule.throw('semantic_error', 'Unable to dereference a non-pointers', assign_var) dt = copy(root.data_type) dt.pointers -= deref_count return ExprNode('Dereference', dt, deref_count, root)
def generate_parameter_list(decl_params): # holds all known parameters params = [generate_parameter(decl_params)] # generate first parameter ending = decl_params.content[-1] # check for trailing parameters if isinstance(ending, ASTNode): while ending.name == 'n_func_params': # func_params_decl -> n_func_params -> n_func_param params.append(generate_parameter(ending.content[1])) # update while loop ending = ending.content[-1] # iterate through params to ensure that they are valid # has encountered "optional" parameter optional = False # has encountered "indefinite" parameter indefinite = False for param in params: # only one indefinite parameter per function and must be the final parameter if indefinite: errormodule.throw( 'semantic_error', 'A function\'s indefinite parameter must be the last parameter', decl_params) # if it has the property indefinite, it is indefinite elif hasattr(param, 'indefinite'): indefinite = True # if it has a default_value, it is optional elif hasattr(param, 'default_value'): optional = True # if there has been an optional param, and this param is not optional, throw param order exception elif optional: errormodule.throw( 'semantic_error', 'Normal parameter preceded by optional parameter', decl_params) # return complete parameter return params
def compile_parameters(param_ast): params = [] expr = '' if not isinstance(param_ast, ASTNode): return params for item in param_ast.content: if isinstance(item, ASTNode): if item.name == 'expr': expr = item elif item.name == 'named_param': ue = unparse(expr) if len(ue) > 1 or isinstance( ue[0], ASTNode) or ue[0].type != 'IDENTIFIER': errormodule.throw( 'semantic_error', 'Invalid parameter name', param_ast. content[0 if isinstance(param_ast.content[0], ASTNode ) else 1]) expr = (ue[0].value, generate_expr(item.content[1])) elif item.name == 'n_param': params += compile_parameters(item) return [generate_expr(expr) if isinstance(expr, ASTNode) else expr ] + params
def generate_expr(expr): # hold the resulting expr root = None for item in expr.content: # only possible value is ASTNode # if it is a root ASTNode, generate it if item.name == 'or': root = generate_logical(item) # if the root has a continued expr elif item.name == 'n_expr': # if it is InlineIf if item.content[0].type == '?': # root ? val1 : val2 val1 = generate_logical(item.content[1]) val2 = generate_logical(item.content[3]) # ensure root is boolean if not types.boolean(root.data_type): errormodule.throw( 'semantic_error', 'Comparison expression of inline comparison must be a boolean', expr.content[0]) # get the dominant resulting type dt = types.dominant(val1.data_type, val2.data_type) if not dt: dt = types.dominant(val2.data_type, val1.data_type) # if neither can be overruled by each other, throw error if not dt: errormodule.throw( 'semantic_error', 'Types of inline comparison must be similar', item) return ExprNode('InlineCompare', dt, root, val1, val2) # otherwise, it is Null Coalescence else: # recursive while loop n_expr = item while n_expr.name == 'n_expr': # get root expression logical = generate_logical(n_expr.content[1]) # ensure the root and logical are coercible / equivalent if types.coerce(root.data_type, logical.data_type): root = ExprNode('NullCoalesce', root.data_type, root, logical) # otherwise it is an invalid null coalescence else: errormodule.throw( 'semantic_error', 'Types of null coalescing must be similar', expr) # recur n_expr = n_expr.content[-1] # return final expr return root
def check_parameters(func, params, ast): # check if a given parameter is mandatory def required(parameter): return not hasattr(parameter, 'optional') and not hasattr( parameter, 'indefinite') base_params = func.data_type.parameters names = [] met_count = 1 for i in range(len(params)): if met_count > len(base_params): errormodule.throw('semantic_error', 'Too many parameters for function', ast) if isinstance(params[i], tuple): elems = [x for x in base_params if x.name == params[i][0]] if len(elems) == 0: errormodule.throw( 'semantic_error', 'Function has no parameter \'%s\'' % params[i][0], ast) elif params[i][0] in names: errormodule.throw( 'semantic_error', 'Multiple values specified for parameter \'%s\'' % params[i][0], ast) elif hasattr(elems[0], 'indefinite'): errormodule.throw( 'semantic_error', 'Unable to explicitly specify value for indefinite parameter', ast) elif not dominant(elems[0].data_type, params[i][1].data_type): errormodule.throw('semantic_error', 'Parameter data types don\'t match', ast) if required(elems[0]): met_count += 1 names.append(elems[0].name) else: met_ndx = met_count - 1 if not dominant(base_params[met_ndx].data_type, params[i].data_type): errormodule.throw('semantic_error', 'Parameter data types don\'t match', ast) names.append(base_params[met_ndx].name) if required(base_params[met_ndx]): met_count += 1 if met_count - 1 < len([x for x in base_params if required(x)]): errormodule.throw('semantic_error', 'Too few parameters for function call', ast)
def check_unmatched(code_str): unmatched = re.finditer(r"[^\s]", code_str) for item in unmatched: er.throw("lex_error", "Invalid identifier name", [item.group(0), item.start()])
def check_char(char, ndx): slash_chars = ["\\n", "\\t", "\\r", "\\\\", "\\\'", "\\\""] if len(char) > 1: if char not in slash_chars: er.throw("lex_error", "Invalid char literal", [char, ndx])
def generate_logical(logical): # if it is a comparison, pass it on to the next generator if logical.name == 'comparison': return generate_comparison(logical) # unpack tree if it exists if len(logical.content) > 1: # hold the unpacked tree unpacked_tree = logical.content[:] # semi-recursive for loop to unpack tree for item in unpacked_tree: # if it is an ASTNode if isinstance(item, ASTNode): # and falls into the 'n' categories if item.name.startswith('n'): # unpack it unpacked_tree += unpacked_tree.pop().content # root holds the next level downward, op = operator being used root, op = generate_logical(unpacked_tree.pop(0)), None # iterate through unpacked tree for item in unpacked_tree: # main tree if isinstance(item, ASTNode): # get next comparison tree if it is a comparison otherwise get next logical tree tree = generate_comparison( item) if item.name == 'comparison' else generate_logical( item) # if both are simple data types if isinstance(tree.data_type, types.DataType) and isinstance( root.data_type, types.DataType): # check booleans and generate boolean operators if tree.data_type.data_type == types.DataTypes.BOOL and tree.data_type.pointers == 0 and \ root.data_type.data_type == types.DataTypes.BOOL and root.data_type.pointers == 0: root = ExprNode( op, types.DataType(types.DataTypes.BOOL, 0), root, tree) continue # generate bitwise operators else: # extract dominant type and if there is not one, throw error dom = types.dominant(root.data_type, tree.data_type) if dom: if not types.coerce( types.DataType(types.DataTypes.INT, 0), dom): errormodule.throw( 'semantic_error', 'Unable to apply bitwise %s to object' % op.lower(), logical) root = ExprNode('Bitwise' + op, dom, root, tree) else: if not types.coerce( types.DataType(types.DataTypes.INT, 0), tree.data_type): errormodule.throw( 'semantic_error', 'Unable to apply bitwise %s to object' % op.lower(), logical) root = ExprNode('Bitwise' + op, tree.data_type, root, tree) # handle operator overloading elif isinstance(root.data_type, types.CustomType): # get and check method method = modules.get_property(tree.data_type, '__%s__' % op.lower()) functions.check_parameters(method, [tree], item) if method: root = ExprNode('Call', method.data_type.return_type, method, tree) else: errormodule.throw( 'semantic_error', 'Object has no method \'__%s__\'' % op.lower(), logical) else: errormodule.throw( 'semantic_error', 'Unable to apply bitwise operator to object', logical) # only token is operator else: name = item.type.lower() op = name[0].upper() + name[1:] return root # otherwise recur to next level down else: return generate_logical(logical.content[0])
def generate_unary_atom(u_atom): # generate hold atom atom = generate_atom(u_atom.content[-1]) if len(u_atom.content) > 1: # check for packages if isinstance(atom, Package): errormodule.throw('semantic_error', 'Unable to apply operator to package', u_atom) return # check for tuples if isinstance(atom.data_type, types.Tuple): errormodule.throw('semantic_error', 'Unable to apply operator to multiple values', u_atom) return # check data type literal if isinstance(atom.data_type, types.DataTypeLiteral): errormodule.throw('semantic_error', 'Unable to apply operator to Data Type literal', u_atom) return # check for template if isinstance(atom.data_type, types.Template): errormodule.throw('semantic_error', 'Unable to apply operator to template', u_atom) return prefix = u_atom.content[0].content[0] # handle sine change if prefix.type == '-': # test for numericality if types.numeric(atom.data_type): # handle modules if isinstance(atom.data_type, types.CustomType): invert_method = modules.get_property( atom.data_type, '__invert__') return ExprNode('Call', invert_method.data_type.return_type, invert_method) # change the sine of an element else: return ExprNode('ChangeSine', atom.data_type, atom) else: # throw error errormodule.throw( 'semantic_error', 'Unable to change sine on non-numeric type.', u_atom) elif prefix.type == 'AMP': dt = copy(atom.data_type) # create pointer dt.pointers += 1 # reference pointer return ExprNode('Reference', dt, atom) # handle deref op elif prefix.type == '*': do = len(unparse(u_atom.content[0].content[1])) + 1 if len( u_atom.content[0].content) > 1 else 1 # handle pointer error # < because that means there is more dereferencing than there are references to dereference if atom.data_type.pointers < do: errormodule.throw('semantic_error', 'Unable to dereference a non-pointer', u_atom.content[0]) elif isinstance(atom.data_type, types.VoidPointer): if atom.data_type.pointers <= do: errormodule.throw('semantic_error', 'Unable to dereference void pointer', u_atom.content[0]) vp = copy(atom.data_type) vp.pointers -= 1 return ExprNode('Dereference', vp, do, atom) else: dt = copy(atom.data_type) dt.pointers -= do # return dereference with count return ExprNode('Dereference', dt, do, atom) else: return atom
def check_operands(dt1, dt2, operator, ast): # check for custom type mismatch if not isinstance(dt1, types.CustomType) and isinstance( dt2, types.CustomType): errormodule.throw('semantic_error', 'Invalid type match up for numeric operator', ast) # check for invalid pointer arithmetic if dt1.pointers > 0: if isinstance(dt2, types.DataType): if dt2.data_type == types.DataTypes.INT and dt2.pointers == 0: return dt1 errormodule.throw( 'semantic_error', 'Pointer arithmetic can only be performed between an integer and a pointer', ast) # check addition operator if operator == '+': # numeric addition if types.numeric(dt1) and types.numeric(dt2): if types.coerce(dt1, dt2): return dt2 return dt1 # list / string concatenation elif types.enumerable(dt1) and types.enumerable(dt2): if dt1 == dt2: return dt1 # check arrays and lists elif (isinstance(dt1, types.ArrayType) or isinstance(dt1, types.ListType)) and \ (isinstance(dt2, types.ArrayType) or isinstance(dt2, types.ListType)): if dt1.element_type == dt2.element_type: return dt1 errormodule.throw( 'semantic_error', 'Unable to apply operator to dissimilar enumerable types', ast) errormodule.throw('semantic_error', 'Invalid type(s) for operator \'%s\'' % operator, ast) # check multiply operator elif operator == '*': # numeric multiplication if types.numeric(dt1) and types.numeric(dt2): if types.coerce(dt1, dt2): return dt2 return dt1 # string multiplication if isinstance(dt1, types.DataType) and isinstance(dt2, types.DataType): # we can assume val1's pointers if dt2.pointers == 0: if dt1.data_type == types.DataTypes.STRING and dt2.data_type == types.DataTypes.INT: return dt1 errormodule.throw('semantic_error', 'Invalid type(s) for operator \'%s\'' % operator, ast) # check all other operators else: if types.numeric(dt1) and types.numeric(dt2): if types.coerce(dt1, dt2): return dt2 return dt1 errormodule.throw('semantic_error', 'Invalid type(s) for operator \'%s\'' % operator, ast)
def generate_shift(shift): # if there is a shift performed if len(shift.content) > 1: # extract operator-expr list unpacked_tree = shift.content[:] while unpacked_tree[-1].name == 'n_shift': unpacked_tree += unpacked_tree.pop().content # get first expression root = generate_arithmetic(unpacked_tree.pop(0)) # check for overload flag overload = False # check root data type if not isinstance(root.data_type, types.DataType): if isinstance(root.data_type, types.CustomType): overload = True else: errormodule.throw( 'semantic_error', 'Invalid type for left operand of binary shift', shift.content[0]) # operator used op = '' for item in unpacked_tree: # ast node => arithmetic expr if isinstance(item, ASTNode): # generate next tree element tree = generate_arithmetic(item) # check for overloads if overload: method = modules.get_property(root.data_type, '__%s__' % op.lower()) if not method: errormodule.throw( 'semantic_error', 'Invalid type for left operand of binary shift', shift.content[0]) functions.check_parameters(method.data_type, [tree], item) overload = isinstance(method.data_type.return_type, types.CustomType) root = ExprNode('Call', method.data_type.return_type, method, [tree]) # type check element if isinstance(tree.data_type, types.DataType): # ensure it is an integer (binary shifts can only be continued by integers) if tree.data_type.data_type == types.DataTypes.INT and tree.data_type.pointers == 0: root = ExprNode(op, root.data_type, root, tree) continue errormodule.throw( 'semantic_error', 'Invalid type for right operand of binary shift', item) # token => operator else: if item.type == '<<': op = 'Lshift' elif item.type == '>>': op = 'ARshift' else: op = 'LRshift' return root # otherwise, pass on to arithmetic parser else: return generate_arithmetic(shift.content[0])
def generate_comparison(comparison): # generate shift if it is a shift tree (because the comparison generator is recursive) if comparison.name == 'shift': return generate_shift(comparison) # evaluate comparison if there is one if len(comparison.content) > 1: # generate inversion operator if comparison.name == 'not': # generate base tree to invert tree = generate_shift(comparison.content[1]) # check for data types if isinstance(tree.data_type, types.DataType): # unable to ! pointers if tree.data_type.pointers > 0: errormodule.throw( 'semantic_error', 'The \'!\' operator is not applicable to pointer', comparison) # generate normal not operator return ExprNode('Not', tree.data_type, tree) # generate overloaded not operator elif isinstance(tree.data_type, types.CustomType): # get and check overloaded not method not_method = modules.get_property(tree.data_type, '__not__') functions.check_parameters(not_method, tree, comparison) if not_method: return ExprNode('Call', not_method.data_type.return_type, not_method, tree) else: errormodule.throw('semantic_error', 'Object has no method \'__not__\'', comparison) else: errormodule.throw( 'semantic_error', 'The \'!\' operator is not applicable to object', comparison) # otherwise generate normal comparison operator else: # comparison op method dictionary comparison_methods = { '<=': '__lteq__', '>=': '__gteq__', "<": '__lt__', '>': '__gt__', '==': '__eq__', '!=': '__neq__', '===': '__seq__', '!==': '__sneq__' } # unpack tree into expressions and operators unpacked_tree = comparison.content[:] for item in unpacked_tree: if item.name == 'n_comparison': unpacked_tree += unpacked_tree.pop().content # root = first element in unpacked tree, op = operator root, op = generate_comparison(unpacked_tree.pop(0)), None for item in unpacked_tree: # all elements are ASTs # not is base expression if item.name == 'not': # extract next operator tree n_tree = generate_comparison(item) # check for overloads if isinstance(root.data_type, types.CustomType): method = modules.get_property(root.data_type, comparison_methods[op]) if not method: if op in {'<=', '>=', '<', '>'}: errormodule.throw( 'semantic_error', 'Unable to use numeric comparison with non-numeric type', comparison) else: functions.check_parameters(method.data_type, [n_tree], comparison) root = ExprNode('Call', method.data_type.return_type, method, [n_tree]) # check numeric comparison if op in {'<=', '>=', '<', '>'}: # check invalid overloads if isinstance(n_tree.data_type, types.CustomType): errormodule.throw( 'semantic_error', 'Invalid type match up for numeric comparison', comparison) if types.numeric(n_tree.data_type) and types.numeric( root.data_type): root = ExprNode( op, types.DataType(types.DataTypes.BOOL, 0), root, n_tree) else: errormodule.throw( 'semantic_error', 'Unable to use numeric comparison with non-numeric type', comparison) # generate standard comparison elif op in {'==', '!=', '===', '!=='}: root = ExprNode( op, types.DataType(types.DataTypes.BOOL, 0), root, n_tree) # if it is not a base expression, it is an operators elif item.name == 'comparison_op': op = item.content[0].value return root # otherwise recur else: return generate_comparison(comparison.content[0])
def get_return_type(function_body): def generate_returns(rt_expr): # hold return types rt_types = [] for elem in rt_expr.content: if isinstance(elem, ASTNode): # if it is an expr, assume it is part of return if elem.name == 'expr': # add to return types rt_types.append(generate_expr(elem)) # if there are more/multiple return expression, add those to the return type list as well elif elem.name == 'n_rt_expr': # get a set of return types and decide how to add them to the list n_types = generate_returns(elem) if isinstance(n_types, list): rt_types += n_types else: rt_types.append(n_types) # if there are no returns, return None if len(rt_types) == 0: return # return a list if there are multiple or just one if there is only 1 return rt_types if len(rt_types) > 1 else rt_types[0] # return type holder rt_type = None # if it has encountered a return value is_rt_type = False # if it is a generator generator = False for item in function_body.content: if isinstance(item, ASTNode): if item.name == 'return_stmt': if len(item.content) > 1: # generate new type from return expr n_type = generate_returns(item.content[1]) # if they are not equal and there is a return type if n_type != rt_type and is_rt_type and not coerce( rt_type, n_type): errormodule.throw('semantic_error', 'Inconsistent return type', item) else: rt_type = n_type else: # if the there is an rt type and it is not null if is_rt_type and rt_type: errormodule.throw('semantic_error', 'Inconsistent return type', item) # no need to update as it is already null is_rt_type = True elif item.name == 'yield_stmt': if len(item.content) > 1: # generate new type from return expr n_type = generate_returns(item.content[1]) # if they are not equal and there is a return type if n_type != rt_type and is_rt_type and not coerce( rt_type, n_type): errormodule.throw('semantic_error', 'Inconsistent return type', item) else: rt_type = n_type else: # if the there is an rt type and it is not null if is_rt_type and rt_type: errormodule.throw('semantic_error', 'Inconsistent return type', item) # no need to update as it is already null is_rt_type = True generator = True else: # get type from rest of function temp_type = get_return_type(item) # if the types are inconsistent if is_rt_type and temp_type != rt_type: errormodule.throw('semantic_error', 'Inconsistent return type', item) # otherwise update return type else: rt_type = temp_type # since rt_type will be evaluated on the basis of not being a direct data type # return None is ok return rt_type[0].data_type if isinstance(rt_type, tuple) else rt_type, generator
def generate_assignment_expr(root, assign_expr): # check for traditional assignment if isinstance(assign_expr.content[0], ASTNode): # NOTE all upper level values are ASTNodes # value is used to determine where to begin generating assignment expr is_n_assign = int(assign_expr.content[0].name != 'n_assignment') # variables is the collection of variables used in assignment (assign vars) # initializers is the matching initializer set variables, initializers = [root], [generate_expr(assign_expr.content[2 - is_n_assign])] # if there are multiple variables if assign_expr.content[0].name == 'n_assignment': # holding content used in recursive for loop assign_content = assign_expr.content[0].content for item in assign_content: # ignore commas if isinstance(item, ASTNode): # check for the assignment variable if item.name == 'assign_var': # add generated assignment variable variables.append(generate_assign_var(item)) elif item.name == 'n_assignment': # recur assign_content = item.content # if there are multiple expressions if assign_expr.content[-1].name == 'n_list': # holding content used in recursive for loop expressions = assign_expr.content[-1].content for item in expressions: # ignore commas if isinstance(item, ASTNode): # check for expression (initializer) if item.name == 'expr': # generate initializer expression expr = generate_expr(item) # if it is a tuple (multiple values stored in a single expression) if isinstance(expr.data_type, types.Tuple): # add each value to expression set (de-tuple) for elem in expr.data_type.values: initializers.append(elem) # else add raw expr to list else: initializers.append(expr) elif item.name == 'n_list': # recur expressions = item.content # check for matching assignment properties (unmodified variables) if len(variables) != len(initializers): errormodule.throw('semantic_error', 'Assignment value counts don\'t match', assign_expr) # get the assignment operator used # use offset to calculate it op = assign_expr.content[1 - is_n_assign].content[0] # iterate through variables and initializers together for var, expr in zip(variables, initializers): # if the variable is not modifiable if not modifiable(var): errormodule.throw('semantic_error', 'Unable to modify unmodifiable l-value', assign_expr) # if there is a type mismatch if not types.coerce(var.data_type, expr.data_type): errormodule.throw('semantic_error', 'Variable type and reassignment type do not match', assign_expr) # if there is a compound operator if op.type != '=' and not types.numeric(var.data_type): # all compound operators only work on numeric types errormodule.throw('semantic_error', 'Compound assignment operator invalid for non-numeric type', op) # return generate statement return StatementNode('Assign', op.type, dict(zip(variables, initializers))) # else assume increment and decrement else: # holds whether or not it is increment of decrement increment = assign_expr.content[0].type == '+' # check if the root is modifiable if not modifiable(root): errormodule.throw('semantic_error', 'Unable to modify unmodifiable l-value', assign_expr) # check if the root is numeric (as only numeric types accept these operators) if not types.numeric(root.data_type): errormodule.throw('semantic_error', 'Unable to %s non numeric value' % 'increment' if increment else 'decrement', assign_expr) # generate statement return StatementNode('Increment' if increment else 'Decrement', root)
def generate_statement(stmt, context: Context): return { # handle return 'return_stmt': generate_return, # handle yield # yield uses same function, b/c it differentiates 'yield_stmt': generate_return, # generate break statement and check context 'break_stmt': lambda s, a: StatementNode('Break') if context.break_context else errormodule.throw('semantic_error', 'Invalid context for break statement', stmt), # generate continue statement and check context 'continue_stmt': lambda s, a: StatementNode('Continue') if context.continue_context else errormodule.throw('semantic_error', 'Invalid context for continue statement', stmt), # generate throw statement 'throw_stmt': lambda s: StatementNode('Throw', generate_expr(s.content[1])), # generate variable with no modifiers 'variable_declaration': lambda s: generate_variable_declaration(s, []), # generate variable with external modifier 'external_stmt': lambda s: generate_variable_declaration(s.content[1].content[0], [Modifiers.EXTERNAL]), # generate variable with volatile and possibly external modifiers 'lock_stmt': lambda s: generate_variable_declaration(s.content[-1], [Modifiers.LOCK] if s.content[1].name != 'extern' else [Modifiers.LOCK, Modifiers.EXTERNAL]), # generate assignment / function call statement 'assignment': generate_assignment, # generate delete statement 'delete_stmt': generate_delete # subscript to get statement name and then call function, pass in context if necessary }[stmt.name](*([stmt, context] if stmt.name in {'yield_stmt', 'return_stmt', 'break_stmt', 'continue_stmt'} else [stmt]))
def generate_type(ext): pointers = 0 # handle std types (from extension) if ext.name == 'types': if ext.content[0].name == 'deref_op': # extract data type pointers pointers = len(unparse(ext.content[0])) # update ext to simple types ext = ext.content[-1] # selects last element (always simple types) # if it is token, assume array, list or dict if isinstance(ext.content[0], Token): # assume array if ext.content[0].type == 'ARRAY_TYPE': # ext.content[1].content[1] == pure_types -> array_modifier -> types et = generate_type(ext.content[1].content[1]) # extract count value # ext.content[1].content[1] == pure_types -> array_modifiers -> expr error_ast = ext.content[1].content[3] try: count = get_array_bound( generate_expr(ext.content[1].content[3])) except IndexError: errormodule.throw('semantic_error', 'Index out of range', error_ast) return if not count and count != 0: errormodule.throw('semantic_error', 'Non-constexpr array bound', error_ast) elif type(count) == bool: errormodule.throw('semantic_error', 'Invalid value for array bound', error_ast) try: _ = count < 0 count = float(count) except (ValueError, TypeError): errormodule.throw('semantic_error', 'Invalid value for array bound', error_ast) if not count.is_integer(): errormodule.throw('semantic_error', 'Invalid value for array bound', error_ast) elif count < 1: errormodule.throw('semantic_error', 'Invalid value for array bound', error_ast) return types.ArrayType(et, int(count), pointers) # assume list elif ext.content[0].type == 'LIST_TYPE': # ext.content[1].content[1] == pure_types -> list_modifier -> types return types.ListType(generate_type(ext.content[1].content[1]), pointers) # assume function elif ext.content[0].type in {'FUNC', 'ASYNC'}: params, return_types = None, None for item in ext.content[1].content: if not isinstance(item, Token): if item.name == 'func_params_decl': params = generate_parameter_list(item) elif item.name == 'rt_type': return_types = get_return_from_type(item) return types.Function(params, return_types, 0, ext.content[0].value == 'ASYNC', False) # assume dict else: # ext.content[1].content[1] == pure_types -> dict_modifier -> types kt, vt = generate_type(ext.content[1].content[1]), generate_type( ext.content[1].content[3]) # check mutability if types.mutable(kt): errormodule.throw('semantic_error', 'Invalid key type for dictionary', ext.content[1].content[1]) # compile dictionary type return types.MapType(kt, vt, pointers) else: if ext.content[0].name == 'pure_types': # data type literal if ext.content[0].content[0].type == 'DATA_TYPE': return types.DataTypeLiteral(types.DataTypes.DATA_TYPE) # return matched pure types return types.DataType({ 'INT_TYPE': types.DataTypes.INT, 'BOOL_TYPE': types.DataTypes.BOOL, 'BYTE_TYPE': types.DataTypes.BYTE, 'FLOAT_TYPE': types.DataTypes.FLOAT, 'LONG_TYPE': types.DataTypes.LONG, 'COMPLEX_TYPE': types.DataTypes.COMPLEX, 'STRING_TYPE': types.DataTypes.STRING, 'CHAR_TYPE': types.DataTypes.CHAR, 'OBJECT_TYPE': types.OBJECT_TEMPLATE, }[ext.content[0].content[0].type], 0) else: # get root symbol name = ext.content[0].content[0].value if name.type == 'THIS': sym = modules.get_instance() else: sym = util.symbol_table.look_up(name) # hold previous symbol ASTNode for error messages prev_sym = ext.content[0].content[0] # extract outer symbols if necessary if len(ext.content[0].content) > 1: content = ext.content[0].content[1:] for item in content: if isinstance(item, Token): if item.type == 'IDENTIFIER': # make sure symbol is a custom type if not isinstance(sym, types.CustomType): errormodule.throw( 'semantic_error', 'Object is not a valid is a data type', prev_sym) identifier = item.value # get member for modules if sym.data_type.data_type == types.DataTypes.MODULE: # get and check property prop = modules.get_property( sym.data_type, identifier) if not prop: errormodule.throw( 'semantic_error', 'Object has no member \'%s\'' % identifier, item) # update previous symbol prev_sym = item # update symbol sym = prop # invalid get member on interfaces elif sym.data_type.data_type == types.DataTypes.INTERFACE: errormodule.throw( 'semantic_error', '\'.\' is not valid for this object', item) # assume struct or enum else: for member in sym.data_type.members: # if there is a match, extract value if member.name == identifier: prev_sym = item sym = member # break to prevent else condition break # if there is no match, throw error else: errormodule.throw( 'semantic_error', 'Object has no member \'%s\'' % identifier, item) # continue recursive for loop elif item.name == 'dot_id': content.extend(item.content) # final check for invalid data types if not isinstance(sym, types.CustomType): errormodule.throw('semantic_error', 'Object is not a valid is a data type', prev_sym) # add instance marker if necessary if sym.data_type.data_type != types.DataTypes.INTERFACE: sym.data_type.instance = True return sym.data_type
def generate_return(stmt, context): # generate return or yield if possible if context.return_context: return StatementNode('Return' if stmt.name == 'return_stmt' else 'Yield', generate_expr(stmt.content[1])) else: errormodule.throw('semantic_error', 'Invalid context for ' + ('yield statement' if stmt.name == 'yield_stmt' else 'return statement'), stmt)
def generate_variable_declaration(stmt, modifiers): # is constant constant = False # for multideclaration: holds set of variables variables = {} # main type extension overall_type = None # main variable initializer initializer = None # is marked constexpr constexpr = False # iterate to generate statement components for item in stmt.content: if isinstance(item, ASTNode): # generate variable if name given if item.name == 'var': variables = generate_var(item) # set overall type elif item.name == 'extension': overall_type = generate_type(item.content[1]) # add initializer elif item.name == 'initializer': initializer = generate_expr(item.content[1]) # set constexpr if := operator used constexpr = item.content[0].type == ':=' # set constant if @ operator used instead of $ elif item.type == '@': constant = True # add constant to modifiers if variable is marked constant if constant: modifiers.append(Modifiers.CONSTANT) # all constexpr variables must also be constant if constexpr and not constant: errormodule.throw('semantic_error', 'Declaration of constexpr on non-constant', stmt) # if multi-declaration was used if isinstance(variables, dict): # position in variable set pos = 0 # iterate through variable dictionary # k = identifier token, v = generated variable object for k, v in variables.items(): # handle variables marked constexpr in non constant environment if v.constexpr and not constant: errormodule.throw('semantic_error', 'Declaration of constexpr on non-constant', stmt) # if there is a global initializer if initializer: # if it is tuple based initializer if isinstance(initializer.data_type, types.Tuple): # if the variable is still within the domain of the global initializer if pos < len(initializer.data_type.values): # if is doesn't have a data type, add the one provided by the initializer if not hasattr(v, 'data_type'): setattr(v, 'data_type', initializer.data_type.values[pos].data_type) # handle invalid double initializers, ie $(x = 2, y) = tupleFunc(); elif hasattr(v, 'initializer'): errormodule.throw('semantic_error', '%s cannot have two initializers' % ('constant' if constant else 'variable'), k) # if there is a type mismatch between the type extension and the given initializer value elif not types.coerce(v.data_type, initializer.data_type.values[pos].data_type): errormodule.throw('semantic_error', 'Variable type extension and initializer data types do not match', k) # otherwise, it is invalid else: errormodule.throw('semantic_error', 'Multi-%s declaration cannot have single global initializer' % ('constant' if constant else 'variable'), stmt) # constexpr value (constexpr(s) store value so they can be evaluated at compile-time) val = None if v.constexpr: val = v.initializer # generate symbol object # identifier name, data type, modifiers, value (if constexpr) sym = Symbol(k.value, v.data_type if hasattr(v, 'data_type') else overall_type, modifiers + [Modifiers.CONSTEXPR] if v.constexpr else modifiers, value=val) # if the symbol lacks a data type if not sym.data_type: errormodule.throw('semantic_error', 'Unable to infer data type of variable', k) # if there is a null declared variable (x = null) elif not overall_type and isinstance(sym.data_type, types.DataType) and sym.data_type.data_type == types.DataTypes.NULL: errormodule.throw('semantic_error', 'Unable to infer data type of variable', k) # add variable to symbol table util.symbol_table.add_variable(sym, k) pos += 1 # statement name name = 'DeclareConstants' if constant else 'DeclareVariables' # if there is an initializer, add it to final statement if initializer: return StatementNode(name, overall_type, dict(zip([k.value for k in variables.keys()], variables.values())), modifiers, initializer) return StatementNode(name, overall_type, dict(zip([k.value for k in variables.keys()], variables.values())), modifiers) # if only normal declaration was used else: # handle null declarations (no type extension or initializer) if not overall_type and not initializer: errormodule.throw('semantic_error', 'Unable to infer data type of variable', stmt) # handle null declarations (no type extension and null initializer) if not overall_type and isinstance(initializer.data_type, types.DataType) and initializer.data_type.data_type == types.DataTypes.NULL: errormodule.throw('semantic_error', 'Unable to infer data type of variable', stmt) # check for type extension and initializer mismatch if overall_type and initializer and not types.coerce(overall_type, initializer.data_type): errormodule.throw('semantic_error', 'Variable type extension and initializer data types do not match', stmt) # add constexpr if marked as such if constexpr: modifiers.append(Modifiers.CONSTEXPR) # assume initializer exists if not check_constexpr(initializer): errormodule.throw('semantic_error', 'Expected constexpr', stmt) # add to symbol table util.symbol_table.add_variable(Symbol(variables.value, overall_type if overall_type else initializer.data_type, modifiers, None if not constexpr else initializer), stmt) # return generated statement node return StatementNode('DeclareConstant' if constant else 'DeclareVariable', overall_type, variables.value, initializer, modifiers)