Ejemplo n.º 1
0
		faultnum = csv.reader(file(subject_dir+subject+'\\fault_log.csv','rb'))
		for line in faultnum:
			if faultnum.line_num == 1:
				continue
			currenttime = datetime.datetime.strptime(line[2], '%Y-%m-%d %H:%M:%S')
			if tagtime[tag] >= currenttime or nexttagtime < currenttime:
				continue
			elif currenttime > tagtime[tag] and currenttime<=nexttagtime and len(line)==6:
				if line[5] in fault_info.keys():
					fault_info[line[5]] = fault_info[line[5]] + 1
				else:
					fault_info[line[5]] = 1

		for currentFileName in walkDirectory(sourcedir):
			try:
				astContent = customast.parse_file(currentFileName)
			except:
				print subject,tag,currentFileName
				continue
			lines = set()
			for n in ast.walk(astContent):
				if hasattr(n,'lineno'):
					lines.add(n.lineno)

			if currentFileName in change_info.keys():
				change_count = change_info[currentFileName]
			else:
				change_count = 0

			if currentFileName in fault_info.keys():
				fault_count = fault_info[currentFileName]
Ejemplo n.º 2
0
def code_change_extraction(left_file, right_file):
    # 对两个源程序进行处理,生成各自的中间抽象语法树,并获取节点列表等相应信息
    left_AstContent = customast.parse_file(left_file)
    left_tree = Tree('头指针', 'left_head')
    left_root = Node('AstRoot', 'root')
    left_tree.linktohead(left_root)
    ast_process(left_AstContent, left_root)
    id_set(left_tree.head)
    left_child_to_parent = child_parent_information(left_tree.head)
    left_node_list, left_inner_node_list, left_leaf_node_list = node_get(left_tree.head)
    left_id_to_node = id_to_node_get(left_node_list)
    # 设置每个节点的parent_id属性
    for pair in left_child_to_parent:
        for left_id in left_id_to_node:
            if left_id == pair[0]:
                left_node = left_id_to_node.get(left_id)
                left_node.parent = pair[1]

    right_AstContent = customast.parse_file(right_file)
    right_tree = Tree('头指针', 'right_head')
    right_root = Node('AstRoot', 'root')
    right_tree.linktohead(right_root)
    ast_process(right_AstContent, right_root)
    id_set(right_tree.head)
    right_child_to_parent = child_parent_information(right_tree.head)
    right_node_list, right_inner_node_list, right_leaf_node_list = node_get(right_tree.head)
    right_id_to_node = id_to_node_get(right_node_list)
    for pair in right_child_to_parent:
        for right_id in right_id_to_node:
            if right_id == pair[0]:
                right_node = right_id_to_node.get(right_id)
                right_node.parent = pair[1]

    # 匹配叶子节点
    match_temp = leaf_match(left_leaf_node_list, right_leaf_node_list, 0.6)
    match_final = best_match(match_temp)
    leaf_matched_set(left_leaf_node_list, right_leaf_node_list, match_final)

    # 匹配中间节点
    # 对T1中所有标记为unmatched的中间节点,如果T2中存在一个节点y与之匹配,则将(x, y)加入match_final集合
    # 在匹配中间节点时采用first match,对于中间节点而言,first is best的概率较大
    for node1 in left_inner_node_list:
        for node2 in right_inner_node_list:
            if node1.matched == 0 and node2.matched == 0:
                inner_node_match(node1, node2, match_final, 0.4, 0.6)

    # 将头指针和根节点加入match_final集合,并将matched标志置为1,确保头指针和根节点一定匹配
    match_final.append((0, 0, 1.0))
    match_final.append(('head_parent', 'head_parent', 1.0))
    match_final.append((1, 1, 1.0))

    # 根据T1与T2的匹配节点集合match_final计算从T1转换为T2的编辑操作
    edit_script, change_information, change_information2 = \
        editscript_calculate(left_node_list, match_final, left_id_to_node, right_id_to_node, right_node_list)

    # 根据编辑操作得到相应的ChangeType
    change_type_list, scc_list, parent_entity_list, changed_entity_list = \
        changetype_generation(change_information, change_information2)
    '''
    # 输出change信息
    for i in range(len(change_type_list)):
        if change_type_list[i] != '':
            print('\nChangeType:', change_type_list[i])
            print('scc:', scc_list[i])
            print('ChangedEntity:', changed_entity_list[i])
            print('ParentEntity:', parent_entity_list[i])
    '''
    # 根据变更抽取结果统计ChangeType的分布
    change_type = change_type_enum()
    change_type_percentage = [0] * len(change_type)  # 用于保存每种change出现的百分比
    # 若change_type_list长度为0,表示没有不同,change_type_percentage中的元素全为0,否则计算每种change出现的频率
    change_type_dict = changetype_statistic(change_type_list)
    if len(change_type_list) != 0:
        for key, value in change_type_dict.items():
            for i in range(len(change_type)):
                if key == change_type[i]:
                    change_type_percentage[i] = value / len(change_type_list)

    # change_type_percentage在对应位置保存了每种change_type的分布百分比
    # 例如,change_type_percentage[i]保存的是change_type[i]出现的频率
    return change_type_percentage
Ejemplo n.º 3
0
            continue
        if alias.asname is not None:
          for (name,file,lineno) in self.imports:
            if name==alias.asname and self.fileName==file:
              break
          else:
            self.imports.add((alias.asname,self.fileName,node.lineno))
        elif alias.name != '*':
          for (name,file,lineno) in self.imports:
            if name==alias.name and self.fileName==file:
              break
          else:
            self.imports.add((alias.name,self.fileName,node.lineno))
      self.generic_visit(node)

if __name__ == '__main__':
    myast = MyAst()
    astContent = customast.parse_file('C:\\Users\\JOJO\\Desktop\\pysmell\\detection\\test.py')
    myast.fileName = "C:\\Users\\JOJO\\Desktop\\pysmell\\detection\\test.py"
    myast.visit(astContent)
    print myast.imports
    # print myast.defmagic
    # print myast.usedmagic
    # for useitem in myast.usedmagic:
    #   for defitem in myast.defmagic:
    #     if useitem[0] == defitem[0]:
    #       break
    #   else:
    #     myast.result.append((12,useitem[1],useitem[2],useitem[0]))

    # print myast.result
Ejemplo n.º 4
0

def count_lines(node):
    childnodes = list(ast.walk(node))
    lines = [0]
    for n in childnodes:
        if hasattr(n, 'lineno'):
            lines.append(n.lineno)
    return max(lines)


projects = os.listdir(subject_dir)

for projectName in projects:
    total_projects += 1
    project_dir = subject_dir + projectName
    lines = 0
    files = 0
    for currentFileName in walkDirectory(project_dir):
        try:
            astContent = customast.parse_file(currentFileName)
        except:
            print(project_dir, currentFileName)
            continue
        lines = lines + count_lines(astContent)
        files = files + 1
    project_info_file.writerow([projectName, files, lines])
    total_lines = total_lines + lines
    total_files = total_files + files

project_info_file.writerow([total_projects, total_files, total_lines])