def main(self): """ IN: module_linos OT: (A, B) A: self.module_calls (updated) B: prj_modules """ module_indexing = ModuleIndexing( self.module_helper, self.ast_tree, self.ast_indents ) prj_modules = module_indexing.find_prj_modules() module_linos = module_indexing.indexing_module_linos() assign_analyser = AssignAnalyser( self.module_helper, self.ast_tree, self.ast_indents ) self.line_parser = LineParser( self.module_helper.get_top_module(), assign_analyser.top_assigns ) # ------------------------------------------------ for module, linos in module_linos.items(): var_reachables, parent_module = assign_analyser \ .indexing_assign_reachables( module, module_linos ) self.line_parser.reset(var_reachables, parent_module) self.analyse_module(module, linos) # ------------------------------------------------ return self.module_calls, prj_modules
def find_global_vars(self): """ 哪些是全局变量: runtime 层级的 Import, ImportFrom runtime 层级的 Assign 行内的 `global xxx` IN: module_linos: provided by src.module_analyser.ModuleIndexing #indexing_module_linos self.module_helper self.ast_tree self.ast_indents OT: dict. {var: module} """ top_linos = tuple( lino for lino, indent in self.ast_indents.items() if indent == 0 ) lk.logt('[D3743]', self.top_module, top_linos) # ------------------------------------------------ # runtime 层级的 Import, ImportFrom & runtime 层级的 Assign line_parser = LineParser(self.top_module) for lino in top_linos: ast_line = self.ast_tree[lino] lk.logt('[TEMPRINT]_20190811_214127', lino, ast_line) line_parser.main(ast_line) # line_parser 会自动帮我们处理 ast_line 涉及的 Import, ImportFrom, # Assign 等的变量与 module 的对照关系. # ------------------------------------------------ # 行内的 `global xxx` for lino in self.ast_indents: if lino in top_linos: continue pass # TODO return line_parser.get_vars()
class ModuleAnalyser: line_parser = None def __init__(self, module_helper: ModuleHelper, ast_tree, ast_indents): self.module_helper = module_helper self.ast_tree = ast_tree self.ast_indents = ast_indents self.module_calls = {} # format: {module: [call, ...], ...} def main(self): """ IN: module_linos OT: (A, B) A: self.module_calls (updated) B: prj_modules """ module_indexing = ModuleIndexing( self.module_helper, self.ast_tree, self.ast_indents ) prj_modules = module_indexing.find_prj_modules() module_linos = module_indexing.indexing_module_linos() assign_analyser = AssignAnalyser( self.module_helper, self.ast_tree, self.ast_indents ) self.line_parser = LineParser( self.module_helper.get_top_module(), assign_analyser.top_assigns ) # ------------------------------------------------ for module, linos in module_linos.items(): var_reachables, parent_module = assign_analyser \ .indexing_assign_reachables( module, module_linos ) self.line_parser.reset(var_reachables, parent_module) self.analyse_module(module, linos) # ------------------------------------------------ return self.module_calls, prj_modules def analyse_module(self, module, linos): """ 发现该 module 下的与其他 module 之间的调用关系. """ lk.logd('analyse_module', module, style='■') # lk.logt('[TEMPRINT]_20190811_214927', # self.line_parser.get_global_vars()) related_calls = [] for lino in linos: ast_line = self.ast_tree[lino] module_called = self.analyse_line(ast_line) # lk.logt('[D3233]', module_called) for m in module_called: if m not in related_calls: related_calls.append(m) lk.logt('[I3259]', related_calls) self.module_calls.update({module: tuple(related_calls)}) def analyse_line(self, ast_line): return self.line_parser.main(ast_line)
class ModuleAnalyser: line_parser = None def __init__(self, module_helper: ModuleHelper, ast_tree, ast_indents): self.module_helper = module_helper self.ast_tree = ast_tree self.ast_indents = ast_indents self.module_calls = {} # format: {module: [call, ...], ...} def main(self): """ IN: module_linos OT: (A, B) A: self.module_calls (updated) B: prj_modules """ module_indexing = ModuleIndexing( self.module_helper, self.ast_tree, self.ast_indents ) assign_analyser = AssignAnalyser( self.module_helper, self.ast_tree, self.ast_indents ) prj_modules = module_indexing.find_prj_modules() module_linos = module_indexing.indexing_module_linos() self.line_parser = LineParser(assign_analyser.top_assigns) # ------------------------------------------------ for module, linos in module_linos.items(): var_reachables = assign_analyser.indexing_assign_reachables( module, module_linos ) self.analyse_module(module, linos, var_reachables) # ------------------------------------------------ return self.module_calls, prj_modules def analyse_module(self, module, linos, var_reachables): """ 发现该 module 下的与其他 module 之间的调用关系. """ lk.logd('analyse_module', module, style='■') related_calls = [] self.line_parser.reset(var_reachables) for lino in linos: ast_line = self.ast_tree[lino] modules = self.analyse_line(ast_line) for m in modules: if m not in related_calls: related_calls.append(m) self.module_calls.update({module: tuple(related_calls)}) def analyse_line(self, ast_line): return self.line_parser.main(ast_line)
def indexing_assign_reachables( self, target_module, module_linos ): """ IN: target_module: str. 'src.app.Init.main' module_linos OT: (<dict var_reachables>, <str parent_module>) """ if self.module_helper.is_runtime_module(target_module): # 相当于返回 self.find_global_vars() 的结果. return self.top_assigns.copy(), '' if target_module not in module_linos: lk.logt('[E2459]', target_module, module_linos) raise Exception lk.logt('[I0114]', target_module) # ------------------------------------------------ """ workflow: 1. 以 target_module 的 linos[0] 为起点, 向前找到第一个 indent 为 0 的 lino 2. 以 target_module 的 linos[-1] 为起点, 向后找到第一个 indent 为 0 的 lino 3. 在此区间内, 将所有 ast_defs 进行解析, 并认定为 var_reachables """ # ------------------------------------------------ lino reachables target_linos = module_linos[target_module] curr_module_lino = target_linos[0] start_offset, end_offset = target_linos[0], target_linos[-1] + 1 # the start lino reachable indent = self.ast_indents[start_offset] # lk.logt('[TEMPRINT]20190811182309', target_module, start_offset, # indent) if indent == 0: parent_module = '' else: while True: parent_module = self.module_helper.get_parent_module( target_module ) # lk.logt('[TEMPRINT]20190811182549', target_module, # parent_module) parent_linos = module_linos[parent_module] start_offset, end_offset = parent_linos[0], parent_linos[-1] + 1 parent_indent = self.ast_indents[start_offset] if parent_indent == 0: break else: continue # -> parent_module = 'src.app.Init' # the end lino reachable while end_offset < self.max_lino: if end_offset in self.ast_indents: indent = self.ast_indents[end_offset] if indent == 0: break end_offset += 1 # get lino reachalbes lino_reachables = [ lino for lino in range(start_offset, end_offset) if lino in self.ast_indents and lino != curr_module_lino ] """ 这里为什么要判断 `lino != curr_module_lino`? 因为 target_module 不能指任自身, 所以应去除. 例如 target_module = 'src.app.module', 在源码中, 不能因此自动产生 `module: src.app.module` 的对应关系. 所以不能加入到 assigns 中. """ # parse vars line_parser = LineParser(self.top_module) ast_defs = ("<class '_ast.FunctionDef'>", "<class '_ast.ClassDef'>") for lino in lino_reachables: ast_line = self.eval_ast_line(lino) if ast_line[0] in ast_defs: line_parser.main(ast_line) return line_parser.get_vars(), parent_module