def _parse_assert(token_list): expr = larc_expr.parse_expr(token_list, True) if token_list.peek().is_indent: #没有附加异常对象 return expr, None token_list.pop_sym(",") exc_obj_expr = larc_expr.parse_expr(token_list, True) return expr, exc_obj_expr
def _parse_if(token_list, curr_indent_count, loop_deep): if_list = [] if_global_var_set = set() while True: expr = larc_expr.parse_expr(token_list, end_at_comma=True) token_list.pop_sym(":") stmt_list, global_var_set = (parse_stmt_list(token_list, curr_indent_count, loop_deep)) if_list.append((expr, stmt_list)) if_global_var_set |= global_var_set if not token_list or token_list.peek_indent() < curr_indent_count: #if语句结束 return if_list, None, if_global_var_set token_list.pop_indent(curr_indent_count) t = token_list.pop() if t.is_elif: continue if t.is_else: break token_list.revert() token_list.revert() return if_list, None, if_global_var_set #解析else部分 token_list.pop_sym(":") else_stmt_list, global_var_set = (parse_stmt_list(token_list, curr_indent_count, loop_deep)) if_global_var_set |= global_var_set return if_list, else_stmt_list, if_global_var_set
def _parse_if(token_list, curr_indent_count, loop_deep): if_list = [] if_global_var_set = set() while True: expr = larc_expr.parse_expr(token_list, end_at_comma = True) token_list.pop_sym(":") stmt_list, global_var_set = ( parse_stmt_list(token_list, curr_indent_count, loop_deep)) if_list.append((expr, stmt_list)) if_global_var_set |= global_var_set if not token_list or token_list.peek_indent() < curr_indent_count: #if语句结束 return if_list, None, if_global_var_set token_list.pop_indent(curr_indent_count) t = token_list.pop() if t.is_elif: continue if t.is_else: break token_list.revert() token_list.revert() return if_list, None, if_global_var_set #解析else部分 token_list.pop_sym(":") else_stmt_list, global_var_set = ( parse_stmt_list(token_list, curr_indent_count, loop_deep)) if_global_var_set |= global_var_set return if_list, else_stmt_list, if_global_var_set
def parse_var_define(token_list, module, cls, var_set_list, non_local_var_used_map, ret_expr_token_list=False): while True: start_t = token_list.peek() var_name = parse_var_name(token_list) t, sym = token_list.pop_sym() if not isinstance(var_name, str) or sym == "=": if ret_expr_token_list: expr, sym = _parse_var_init_expr_token_list(token_list) else: expr = larc_expr.parse_expr(token_list, module, cls, var_set_list, non_local_var_used_map, end_at_comma=True) t, sym = token_list.pop_sym() else: expr = None yield start_t, var_name, expr if sym == ";": return if sym != ",": t.syntax_err("需要','或';'")
def _parse_return(token_list): if not token_list or token_list.peek().is_indent: #空return return None expr = larc_expr.parse_expr(token_list) if token_list: token_list.peek_indent() return expr
def _parse_for(token_list, curr_indent_count, loop_deep): t = token_list.peek() """说明: 原本是for后面整个作为一个* in *表达式解析,但是对于unpack: for i, j in a, b: 就会失败,因为逗号比in优先级低 对i,j和a,b加括号的话又麻烦,这里in应该作为一个关键字而非运算符 因此特殊处理,解析左值的时候,碰到in停止""" lvalue = larc_expr.parse_expr(token_list, end_at_in = True) if not lvalue.is_lvalue: t.syntax_err("for语句中'in'左边非左值表达式") t = token_list.pop() if not t.is_in: t.syntax_err("需要'in'") expr = larc_expr.parse_expr(token_list) token_list.pop_sym(":") stmt_list, global_var_set = ( parse_stmt_list(token_list, curr_indent_count, loop_deep + 1)) return lvalue, expr, stmt_list, global_var_set
def _parse_for(token_list, curr_indent_count, loop_deep): t = token_list.peek() """说明: 原本是for后面整个作为一个* in *表达式解析,但是对于unpack: for i, j in a, b: 就会失败,因为逗号比in优先级低 对i,j和a,b加括号的话又麻烦,这里in应该作为一个关键字而非运算符 因此特殊处理,解析左值的时候,碰到in停止""" lvalue = larc_expr.parse_expr(token_list, end_at_in=True) if not lvalue.is_lvalue: t.syntax_err("for语句中'in'左边非左值表达式") t = token_list.pop() if not t.is_in: t.syntax_err("需要'in'") expr = larc_expr.parse_expr(token_list) token_list.pop_sym(":") stmt_list, global_var_set = (parse_stmt_list(token_list, curr_indent_count, loop_deep + 1)) return lvalue, expr, stmt_list, global_var_set
def parse_for_prefix(token_list, module, cls, var_set_list, non_local_var_used_map): token_list.pop_sym("(") for_var_set = larc_common.OrderedSet() if token_list.peek().is_reserved("var"): token_list.pop() t = token_list.peek() var_name = parse_var_name(token_list) for vn in iter_var_name(var_name): if vn in module.dep_module_set: t.syntax_err("变量名'%s'与导入模块重名" % vn) for var_set in var_set_list + (for_var_set, ): if vn in var_set: t.syntax_err("变量名'%s'重定义" % vn) for_var_set.add(vn) lvalue = larc_expr.var_name_to_expr(var_name) else: lvalue = larc_expr.parse_expr(token_list, module, cls, var_set_list, non_local_var_used_map) token_list.pop_sym(":") iter_obj = larc_expr.parse_expr(token_list, module, cls, var_set_list, non_local_var_used_map) token_list.pop_sym(")") return for_var_set, lvalue, iter_obj
def _parse_print(token_list): #解析print语句,返回表达式列表和是否换行 if not token_list or token_list.peek().is_indent: #空print return [], True expr_list = [] while True: #状态:解析表达式 expr_list.append(larc_expr.parse_expr(token_list, True)) #状态:等待语句结束或"," if not token_list or token_list.peek().is_indent: #语句结束 return expr_list, True #语句未结束 token_list.pop_sym(",") if not token_list or token_list.peek().is_indent: #在逗号后结束,不换行 return expr_list, False
def compile(self): self.literal_set = set() for i in self._items(): i.compile() non_local_var_used_map = larc_common.OrderedDict() for var_name, expr_token_list in self.global_var_init_map.iteritems(): if expr_token_list is not None: self.global_var_init_map[var_name] = (larc_expr.parse_expr( expr_token_list, self, None, (), non_local_var_used_map, end_at_comma=True)) t, sym = expr_token_list.pop_sym() assert not expr_token_list and sym in (",", ";") for vn in larc_stmt.iter_var_name(var_name): if vn in non_local_var_used_map: non_local_var_used_map[vn].syntax_err("全局变量'%s'在定义前使用" % vn)
def _parse_global_var(self, token_list, name_token, export): var_name = name_token.value if var_name in self.dep_module_set: name_token.syntax_err("全局变量名和模块名重复") if var_name in self.class_map: name_token.syntax_err("全局变量名和类名重复") for func_name, arg_count in self.func_map: if func_name == var_name: name_token.syntax_err("全局变量名和函数名重复") if var_name in self.global_var_map: name_token.syntax_err("全局变量重复定义") t = token_list.pop() if not t.is_sym("="): t.syntax_err("需要'='") self.global_var_map[var_name] = larc_expr.parse_expr(token_list) self.global_var_type_info[var_name] = None if export: self.export_global_var_set.add(var_name) self.global_var_type_info[var_name] = "object"
def parse_stmt_list(token_list, upper_indent_count, loop_deep): """解析语句列表,返回列表和global变量名集合 larva代码中,global变量名可在任意位置声明,不过最好在开头,免得歧义""" #获取当前块的缩进 curr_indent_count = token_list.peek_indent() if curr_indent_count <= upper_indent_count: token_list.peek().indent_err() #开始解析 stmt_list = [] global_var_set = set() while token_list: indent_count = token_list.peek_indent() if indent_count < curr_indent_count: #当前块结束,返回 break #解析语句 token_list.pop_indent(curr_indent_count) t = token_list.pop() if t.is_pass: continue if t.is_global: _parse_global_var_declare(token_list, global_var_set) continue if t.is_print: expr_list, print_new_line = _parse_print(token_list) stmt_list.append( _Stmt("print", expr_list=expr_list, print_new_line=print_new_line)) continue if t.is_for: lvalue, expr, for_stmt_list, for_global_var_set = (_parse_for( token_list, curr_indent_count, loop_deep)) stmt_list.append( _Stmt("for", lvalue=lvalue, expr=expr, stmt_list=for_stmt_list)) global_var_set |= for_global_var_set continue if t.is_continue or t.is_break: if loop_deep == 0: t.syntax_err("循环外的'%s'" % t.value) stmt_list.append(_Stmt(t.value)) continue if t.is_return: expr = _parse_return(token_list) stmt_list.append(_Stmt("return", expr=expr)) continue if t.is_while: expr, while_stmt_list, while_global_var_set = (_parse_while( token_list, curr_indent_count, loop_deep)) stmt_list.append( _Stmt("while", expr=expr, stmt_list=while_stmt_list)) global_var_set |= while_global_var_set continue if t.is_if: if_list, else_stmt_list, if_global_var_set = (_parse_if( token_list, curr_indent_count, loop_deep)) stmt_list.append( _Stmt("if", if_list=if_list, else_stmt_list=else_stmt_list)) global_var_set |= if_global_var_set continue if t.is_import: t.syntax_err("import必须出现在代码文件开头") if t.is_elif: t.syntax_err("未匹配的elif") if t.is_else: t.syntax_err("未匹配的else") if t.is_func: t.syntax_err("不允许函数嵌套定义") if t.is_try: (try_stmt_list, except_list, finally_stmt_list, try_global_var_set) = (_parse_try(token_list, curr_indent_count, loop_deep)) stmt_list.append( _Stmt("try", try_stmt_list=try_stmt_list, except_list=except_list, finally_stmt_list=finally_stmt_list)) global_var_set |= try_global_var_set continue if t.is_except: t.syntax_err("未匹配的except") if t.is_finally: t.syntax_err("未匹配的finally") if t.is_assert: expr, exc_obj_expr = _parse_assert(token_list) stmt_list.append( _Stmt("assert", expr=expr, exc_obj_expr=exc_obj_expr)) continue if t.is_raise: expr = _parse_raise(token_list) stmt_list.append(_Stmt("raise", expr=expr)) continue #剩下的就是表达式和赋值了 token_list.revert() expr = larc_expr.parse_expr(token_list) if not token_list or token_list.peek().is_indent: #表达式 stmt_list.append(_Stmt("expr", expr=expr)) continue may_be_assign_token = token_list.peek() if (may_be_assign_token.is_sym and may_be_assign_token.value in ("=", "%=", "^=", "&=", "*=", "-=", "+=", "|=", "/=", "<<=", ">>=", ">>>=")): #赋值 assign_sym = may_be_assign_token.value lvalue = expr if not lvalue.is_lvalue: t.syntax_err("赋值语句'%s'左边非左值表达式" % assign_sym) if assign_sym != "=": #增量赋值 if lvalue.op == "[:]": t.syntax_err("分片无法增量赋值") if lvalue.op in ("tuple", "list"): t.syntax_err("不支持unpack增量赋值") token_list.pop_sym(assign_sym) expr = larc_expr.parse_expr(token_list) stmt_list.append(_Stmt(assign_sym, lvalue=lvalue, expr=expr)) continue return stmt_list, global_var_set
def _parse_while(token_list, curr_indent_count, loop_deep): expr = larc_expr.parse_expr(token_list, end_at_comma = True) token_list.pop_sym(":") stmt_list, global_var_set = ( parse_stmt_list(token_list, curr_indent_count, loop_deep + 1)) return expr, stmt_list, global_var_set
def _parse_raise(token_list): return larc_expr.parse_expr(token_list)
def parse_stmt_list(token_list, upper_indent_count, loop_deep): """解析语句列表,返回列表和global变量名集合 larva代码中,global变量名可在任意位置声明,不过最好在开头,免得歧义""" #获取当前块的缩进 curr_indent_count = token_list.peek_indent() if curr_indent_count <= upper_indent_count: token_list.peek().indent_err() #开始解析 stmt_list = [] global_var_set = set() while token_list: indent_count = token_list.peek_indent() if indent_count < curr_indent_count: #当前块结束,返回 break #解析语句 token_list.pop_indent(curr_indent_count) t = token_list.pop() if t.is_pass: continue if t.is_global: _parse_global_var_declare(token_list, global_var_set) continue if t.is_print: expr_list, print_new_line = _parse_print(token_list) stmt_list.append(_Stmt("print", expr_list = expr_list, print_new_line = print_new_line)) continue if t.is_for: lvalue, expr, for_stmt_list, for_global_var_set = ( _parse_for(token_list, curr_indent_count, loop_deep)) stmt_list.append(_Stmt("for", lvalue = lvalue, expr = expr, stmt_list = for_stmt_list)) global_var_set |= for_global_var_set continue if t.is_continue or t.is_break: if loop_deep == 0: t.syntax_err("循环外的'%s'" % t.value) stmt_list.append(_Stmt(t.value)) continue if t.is_return: expr = _parse_return(token_list) stmt_list.append(_Stmt("return", expr = expr)) continue if t.is_while: expr, while_stmt_list, while_global_var_set = ( _parse_while(token_list, curr_indent_count, loop_deep)) stmt_list.append(_Stmt("while", expr = expr, stmt_list = while_stmt_list)) global_var_set |= while_global_var_set continue if t.is_if: if_list, else_stmt_list, if_global_var_set = ( _parse_if(token_list, curr_indent_count, loop_deep)) stmt_list.append(_Stmt("if", if_list = if_list, else_stmt_list = else_stmt_list)) global_var_set |= if_global_var_set continue if t.is_import: t.syntax_err("import必须出现在代码文件开头") if t.is_elif: t.syntax_err("未匹配的elif") if t.is_else: t.syntax_err("未匹配的else") if t.is_func: t.syntax_err("不允许函数嵌套定义") if t.is_try: (try_stmt_list, except_list, finally_stmt_list, try_global_var_set) = ( _parse_try(token_list, curr_indent_count, loop_deep)) stmt_list.append(_Stmt("try", try_stmt_list = try_stmt_list, except_list = except_list, finally_stmt_list = finally_stmt_list)) global_var_set |= try_global_var_set continue if t.is_except: t.syntax_err("未匹配的except") if t.is_finally: t.syntax_err("未匹配的finally") if t.is_assert: expr, exc_obj_expr = _parse_assert(token_list) stmt_list.append( _Stmt("assert", expr = expr, exc_obj_expr = exc_obj_expr)) continue if t.is_raise: expr = _parse_raise(token_list) stmt_list.append(_Stmt("raise", expr = expr)) continue #剩下的就是表达式和赋值了 token_list.revert() expr = larc_expr.parse_expr(token_list) if not token_list or token_list.peek().is_indent: #表达式 stmt_list.append(_Stmt("expr", expr = expr)) continue may_be_assign_token = token_list.peek() if (may_be_assign_token.is_sym and may_be_assign_token.value in ("=", "%=", "^=", "&=", "*=", "-=", "+=", "|=", "/=", "<<=", ">>=", ">>>=")): #赋值 assign_sym = may_be_assign_token.value lvalue = expr if not lvalue.is_lvalue: t.syntax_err("赋值语句'%s'左边非左值表达式" % assign_sym) if assign_sym != "=": #增量赋值 if lvalue.op == "[:]": t.syntax_err("分片无法增量赋值") if lvalue.op in ("tuple", "list"): t.syntax_err("不支持unpack增量赋值") token_list.pop_sym(assign_sym) expr = larc_expr.parse_expr(token_list) stmt_list.append(_Stmt(assign_sym, lvalue = lvalue, expr = expr)) continue return stmt_list, global_var_set
def _parse_while(token_list, curr_indent_count, loop_deep): expr = larc_expr.parse_expr(token_list, end_at_comma=True) token_list.pop_sym(":") stmt_list, global_var_set = (parse_stmt_list(token_list, curr_indent_count, loop_deep + 1)) return expr, stmt_list, global_var_set
def parse_stmt_list(token_list, module, cls, var_set_list, loop_deep): assert var_set_list stmt_list = _StmtList(var_set_list[-1]) non_local_var_used_map = larc_common.OrderedDict() while token_list: if token_list.peek().is_sym("}"): break #解析语句 t = token_list.pop() if t.is_sym(";"): t.warning("空语句") continue if t.is_sym("{"): stmt = _Stmt("block", stmt_list=parse_stmt_list( token_list, module, cls, var_set_list + (larc_common.OrderedSet(), ), loop_deep)) token_list.pop_sym("}") stmt_list.append(stmt) continue if t.is_reserved("var"): def add_to_curr_var_set(t, vn): if vn in module.dep_module_set: t.syntax_err("变量名'%s'与导入模块重名" % vn) for var_set in var_set_list: if vn in var_set: t.syntax_err("变量名'%s'重定义" % vn) if vn in non_local_var_used_map: non_local_var_used_map[vn].syntax_err("局部变量在定义之前使用") var_set_list[-1].add(vn) for t, var_name, expr in parse_var_define(token_list, module, cls, var_set_list, non_local_var_used_map): for vn in iter_var_name(var_name): add_to_curr_var_set(t, vn) stmt_list.append(_Stmt("var", name=var_name, expr=expr)) continue if t.is_reserved and t.value in ("break", "continue"): if loop_deep == 0: t.syntax_err("循环外的'%s'" % t.value) token_list.pop_sym(";") stmt_list.append(_Stmt(t.value)) continue if t.is_reserved("return"): expr = larc_expr.parse_expr(token_list, module, cls, var_set_list, non_local_var_used_map) token_list.pop_sym(";") stmt_list.append(_Stmt("return", expr=expr)) continue if t.is_reserved("for"): for_var_set, lvalue, iter_obj = parse_for_prefix( token_list, module, cls, var_set_list, non_local_var_used_map) token_list.pop_sym("{") for_stmt_list = parse_stmt_list( token_list, module, cls, var_set_list + (for_var_set.copy(), ), loop_deep + 1) token_list.pop_sym("}") stmt_list.append( _Stmt("for", var_set=for_var_set, lvalue=lvalue, iter_obj=iter_obj, stmt_list=for_stmt_list)) continue if t.is_reserved("while"): token_list.pop_sym("(") expr = larc_expr.parse_expr(token_list, module, cls, var_set_list, non_local_var_used_map) token_list.pop_sym(")") token_list.pop_sym("{") while_stmt_list = parse_stmt_list( token_list, module, cls, var_set_list + (larc_common.OrderedDict(), ), loop_deep + 1) token_list.pop_sym("}") stmt_list.append( _Stmt("while", expr=expr, stmt_list=while_stmt_list)) continue if t.is_reserved("do"): token_list.pop_sym("{") do_stmt_list = parse_stmt_list( token_list, module, cls, var_set_list + (larc_common.OrderedDict(), ), loop_deep + 1) token_list.pop_sym("}") t = token_list.pop() if not t.is_reserved("while"): t.syntax_err("需要'while'") token_list.pop_sym("(") expr = larc_expr.parse_expr(token_list, module, cls, var_set_list, non_local_var_used_map) token_list.pop_sym(")") token_list.pop_sym(";") stmt_list.append(_Stmt("do", expr=expr, stmt_list=do_stmt_list)) continue if t.is_reserved("if"): if_expr_list = [] if_stmt_list_list = [] else_stmt_list = None while True: token_list.pop_sym("(") expr = larc_expr.parse_expr(token_list, module, cls, var_set_list, non_local_var_used_map) token_list.pop_sym(")") token_list.pop_sym("{") if_stmt_list = parse_stmt_list( token_list, module, cls, var_set_list + (larc_common.OrderedDict(), ), loop_deep) token_list.pop_sym("}") if_expr_list.append(expr) if_stmt_list_list.append(if_stmt_list) if not token_list.peek().is_reserved("else"): break token_list.pop() t = token_list.pop() if t.is_reserved("if"): continue if not t.is_sym("{"): t.syntax_err("需要'{'") else_stmt_list = parse_stmt_list( token_list, module, cls, var_set_list + (larc_common.OrderedDict(), ), loop_deep) token_list.pop_sym("}") break stmt_list.append( _Stmt("if", if_expr_list=if_expr_list, if_stmt_list_list=if_stmt_list_list, else_stmt_list=else_stmt_list)) continue if t.is_sym and t.value in larc_token.INC_DEC_SYM_SET: inc_dec_op = t.value t = token_list.peek() lvalue = larc_expr.parse_expr(token_list, module, cls, var_set_list, non_local_var_used_map) if not lvalue.is_lvalue: t.syntax_err("非左值表达式不能做'%s'操作" % inc_dec_op) if lvalue.op in ("[:]", "tuple", "list"): t.syntax_err("分片和解包左值表达式不能做'%s'操作" % inc_dec_op) stmt_list.append(_Stmt(inc_dec_op, lvalue=lvalue)) token_list.pop_sym(";") continue #todo: try catch finally throw assert #剩下的就是表达式和赋值了 token_list.revert() expr = larc_expr.parse_expr(token_list, module, cls, var_set_list, non_local_var_used_map) if token_list.peek().is_sym(";"): #表达式 token_list.pop_sym(";") stmt_list.append(_Stmt("expr", expr=expr)) continue t = token_list.peek() if t.is_sym and t.value in larc_token.ASSIGN_SYM_SET: #赋值 assign_sym = t.value lvalue = expr if not lvalue.is_lvalue: t.syntax_err("赋值操作'%s'左边非左值表达式" % assign_sym) if assign_sym != "=": #增量赋值 if lvalue.op in ("[:]", "tuple", "list"): t.syntax_err("分片和解包左值表达式无法增量赋值") token_list.pop_sym(assign_sym) expr = larc_expr.parse_expr(token_list, module, cls, var_set_list, non_local_var_used_map) token_list.pop_sym(";") stmt_list.append(_Stmt(assign_sym, lvalue=lvalue, expr=expr)) continue t.syntax_err() return stmt_list