faultnum = csv.reader(file(subject_dir+subject+'\\fault_log.csv','rb')) for line in faultnum: if faultnum.line_num == 1: continue currenttime = datetime.datetime.strptime(line[2], '%Y-%m-%d %H:%M:%S') if tagtime[tag] >= currenttime or nexttagtime < currenttime: continue elif currenttime > tagtime[tag] and currenttime<=nexttagtime and len(line)==6: if line[5] in fault_info.keys(): fault_info[line[5]] = fault_info[line[5]] + 1 else: fault_info[line[5]] = 1 for currentFileName in walkDirectory(sourcedir): try: astContent = customast.parse_file(currentFileName) except: print subject,tag,currentFileName continue lines = set() for n in ast.walk(astContent): if hasattr(n,'lineno'): lines.add(n.lineno) if currentFileName in change_info.keys(): change_count = change_info[currentFileName] else: change_count = 0 if currentFileName in fault_info.keys(): fault_count = fault_info[currentFileName]
def code_change_extraction(left_file, right_file): # 对两个源程序进行处理,生成各自的中间抽象语法树,并获取节点列表等相应信息 left_AstContent = customast.parse_file(left_file) left_tree = Tree('头指针', 'left_head') left_root = Node('AstRoot', 'root') left_tree.linktohead(left_root) ast_process(left_AstContent, left_root) id_set(left_tree.head) left_child_to_parent = child_parent_information(left_tree.head) left_node_list, left_inner_node_list, left_leaf_node_list = node_get(left_tree.head) left_id_to_node = id_to_node_get(left_node_list) # 设置每个节点的parent_id属性 for pair in left_child_to_parent: for left_id in left_id_to_node: if left_id == pair[0]: left_node = left_id_to_node.get(left_id) left_node.parent = pair[1] right_AstContent = customast.parse_file(right_file) right_tree = Tree('头指针', 'right_head') right_root = Node('AstRoot', 'root') right_tree.linktohead(right_root) ast_process(right_AstContent, right_root) id_set(right_tree.head) right_child_to_parent = child_parent_information(right_tree.head) right_node_list, right_inner_node_list, right_leaf_node_list = node_get(right_tree.head) right_id_to_node = id_to_node_get(right_node_list) for pair in right_child_to_parent: for right_id in right_id_to_node: if right_id == pair[0]: right_node = right_id_to_node.get(right_id) right_node.parent = pair[1] # 匹配叶子节点 match_temp = leaf_match(left_leaf_node_list, right_leaf_node_list, 0.6) match_final = best_match(match_temp) leaf_matched_set(left_leaf_node_list, right_leaf_node_list, match_final) # 匹配中间节点 # 对T1中所有标记为unmatched的中间节点,如果T2中存在一个节点y与之匹配,则将(x, y)加入match_final集合 # 在匹配中间节点时采用first match,对于中间节点而言,first is best的概率较大 for node1 in left_inner_node_list: for node2 in right_inner_node_list: if node1.matched == 0 and node2.matched == 0: inner_node_match(node1, node2, match_final, 0.4, 0.6) # 将头指针和根节点加入match_final集合,并将matched标志置为1,确保头指针和根节点一定匹配 match_final.append((0, 0, 1.0)) match_final.append(('head_parent', 'head_parent', 1.0)) match_final.append((1, 1, 1.0)) # 根据T1与T2的匹配节点集合match_final计算从T1转换为T2的编辑操作 edit_script, change_information, change_information2 = \ editscript_calculate(left_node_list, match_final, left_id_to_node, right_id_to_node, right_node_list) # 根据编辑操作得到相应的ChangeType change_type_list, scc_list, parent_entity_list, changed_entity_list = \ changetype_generation(change_information, change_information2) ''' # 输出change信息 for i in range(len(change_type_list)): if change_type_list[i] != '': print('\nChangeType:', change_type_list[i]) print('scc:', scc_list[i]) print('ChangedEntity:', changed_entity_list[i]) print('ParentEntity:', parent_entity_list[i]) ''' # 根据变更抽取结果统计ChangeType的分布 change_type = change_type_enum() change_type_percentage = [0] * len(change_type) # 用于保存每种change出现的百分比 # 若change_type_list长度为0,表示没有不同,change_type_percentage中的元素全为0,否则计算每种change出现的频率 change_type_dict = changetype_statistic(change_type_list) if len(change_type_list) != 0: for key, value in change_type_dict.items(): for i in range(len(change_type)): if key == change_type[i]: change_type_percentage[i] = value / len(change_type_list) # change_type_percentage在对应位置保存了每种change_type的分布百分比 # 例如,change_type_percentage[i]保存的是change_type[i]出现的频率 return change_type_percentage
continue if alias.asname is not None: for (name,file,lineno) in self.imports: if name==alias.asname and self.fileName==file: break else: self.imports.add((alias.asname,self.fileName,node.lineno)) elif alias.name != '*': for (name,file,lineno) in self.imports: if name==alias.name and self.fileName==file: break else: self.imports.add((alias.name,self.fileName,node.lineno)) self.generic_visit(node) if __name__ == '__main__': myast = MyAst() astContent = customast.parse_file('C:\\Users\\JOJO\\Desktop\\pysmell\\detection\\test.py') myast.fileName = "C:\\Users\\JOJO\\Desktop\\pysmell\\detection\\test.py" myast.visit(astContent) print myast.imports # print myast.defmagic # print myast.usedmagic # for useitem in myast.usedmagic: # for defitem in myast.defmagic: # if useitem[0] == defitem[0]: # break # else: # myast.result.append((12,useitem[1],useitem[2],useitem[0])) # print myast.result
def count_lines(node): childnodes = list(ast.walk(node)) lines = [0] for n in childnodes: if hasattr(n, 'lineno'): lines.append(n.lineno) return max(lines) projects = os.listdir(subject_dir) for projectName in projects: total_projects += 1 project_dir = subject_dir + projectName lines = 0 files = 0 for currentFileName in walkDirectory(project_dir): try: astContent = customast.parse_file(currentFileName) except: print(project_dir, currentFileName) continue lines = lines + count_lines(astContent) files = files + 1 project_info_file.writerow([projectName, files, lines]) total_lines = total_lines + lines total_files = total_files + files project_info_file.writerow([total_projects, total_files, total_lines])