def changetype_statistic(change_type_list):
    change_type = change_type_enum()  # change_type中存储了所有能识别的ChangeType
    value = [0] * len(change_type)
    change_type_dict = dict(zip(change_type, value))
    for change_type in change_type_list:
        change_type_dict[change_type] += 1  # 统计change_type_list中每种change_type出现的次数
    return change_type_dict
def changetype_statistic(change_type_list):
    change_type = change_type_enum()  # change_type中存储了所有能识别的ChangeType
    value = [0] * len(change_type)
    change_type_dict = dict(zip(change_type, value))
    for change_type in change_type_list:
        change_type_dict[
            change_type] += 1  # 统计change_type_list中每种change_type出现的次数
    return change_type_dict
예제 #3
0
'''


import time
import filecmp
import csv
import os
from file_process import file_find
from change_type import change_type_enum
from change_extraction import code_change_extraction


# 写变更结果文件头,w表示写文本文件,wb写二进制文件,a追加写文件
csvfile = open('ChangeType_Extraction.csv', 'w', newline='')  # 设置newline参数,去掉输出中的空行
csvwriter = csv.writer(csvfile, dialect='excel')
change_type = change_type_enum()
change_type.insert(0, 'filename')  # 利用insert方法插入数据到列表中,便于以一行的方式写入csv文件
change_type.insert(1, 'filepath')
csvwriter.writerow(change_type)  # writerow写入一行数据,writerows写入多行数据
csvfile.close()


# 输入两个版本的文件所在目录,left和right列表中保存了用于比较的两个版本对应路径下的同名文件
start = time.clock()
filedir = 'E:\PyCharm Workspace\Change Extraction'
left_filename = 'scipy-0.16.0'
right_filename = 'scipy-0.16.1'
left, right = file_find(filedir, left_filename, right_filename)


# 对两个版本程序的对应源代码文件进行比较
예제 #4
0
step4:将变更结果写入ChangeType_Extraction.csv
'''

import time
import filecmp
import csv
import os
from file_process import file_find
from change_type import change_type_enum
from change_extraction import code_change_extraction

# 写变更结果文件头,w表示写文本文件,wb写二进制文件,a追加写文件
csvfile = open('ChangeType_Extraction.csv', 'w',
               newline='')  # 设置newline参数,去掉输出中的空行
csvwriter = csv.writer(csvfile, dialect='excel')
change_type = change_type_enum()
change_type.insert(0, 'filename')  # 利用insert方法插入数据到列表中,便于以一行的方式写入csv文件
change_type.insert(1, 'filepath')
csvwriter.writerow(change_type)  # writerow写入一行数据,writerows写入多行数据
csvfile.close()

# 输入两个版本的文件所在目录,left和right列表中保存了用于比较的两个版本对应路径下的同名文件
start = time.clock()
filedir = 'E:\PyCharm Workspace\Change Extraction'
left_filename = 'scipy-0.16.0'
right_filename = 'scipy-0.16.1'
left, right = file_find(filedir, left_filename, right_filename)

# 对两个版本程序的对应源代码文件进行比较
csvfile = open('ChangeType_Extraction.csv', 'a',
               newline='')  # 设置newline参数,去掉输出中的空行
예제 #5
0
def code_change_extraction(left_file, right_file):
    # 对两个源程序进行处理,生成各自的中间抽象语法树,并获取节点列表等相应信息
    left_AstContent = customast.parse_file(left_file)
    left_tree = Tree('头指针', 'left_head')
    left_root = Node('AstRoot', 'root')
    left_tree.linktohead(left_root)
    ast_process(left_AstContent, left_root)
    id_set(left_tree.head)
    left_child_to_parent = child_parent_information(left_tree.head)
    left_node_list, left_inner_node_list, left_leaf_node_list = node_get(left_tree.head)
    left_id_to_node = id_to_node_get(left_node_list)
    # 设置每个节点的parent_id属性
    for pair in left_child_to_parent:
        for left_id in left_id_to_node:
            if left_id == pair[0]:
                left_node = left_id_to_node.get(left_id)
                left_node.parent = pair[1]

    right_AstContent = customast.parse_file(right_file)
    right_tree = Tree('头指针', 'right_head')
    right_root = Node('AstRoot', 'root')
    right_tree.linktohead(right_root)
    ast_process(right_AstContent, right_root)
    id_set(right_tree.head)
    right_child_to_parent = child_parent_information(right_tree.head)
    right_node_list, right_inner_node_list, right_leaf_node_list = node_get(right_tree.head)
    right_id_to_node = id_to_node_get(right_node_list)
    for pair in right_child_to_parent:
        for right_id in right_id_to_node:
            if right_id == pair[0]:
                right_node = right_id_to_node.get(right_id)
                right_node.parent = pair[1]

    # 匹配叶子节点
    match_temp = leaf_match(left_leaf_node_list, right_leaf_node_list, 0.6)
    match_final = best_match(match_temp)
    leaf_matched_set(left_leaf_node_list, right_leaf_node_list, match_final)

    # 匹配中间节点
    # 对T1中所有标记为unmatched的中间节点,如果T2中存在一个节点y与之匹配,则将(x, y)加入match_final集合
    # 在匹配中间节点时采用first match,对于中间节点而言,first is best的概率较大
    for node1 in left_inner_node_list:
        for node2 in right_inner_node_list:
            if node1.matched == 0 and node2.matched == 0:
                inner_node_match(node1, node2, match_final, 0.4, 0.6)

    # 将头指针和根节点加入match_final集合,并将matched标志置为1,确保头指针和根节点一定匹配
    match_final.append((0, 0, 1.0))
    match_final.append(('head_parent', 'head_parent', 1.0))
    match_final.append((1, 1, 1.0))

    # 根据T1与T2的匹配节点集合match_final计算从T1转换为T2的编辑操作
    edit_script, change_information, change_information2 = \
        editscript_calculate(left_node_list, match_final, left_id_to_node, right_id_to_node, right_node_list)

    # 根据编辑操作得到相应的ChangeType
    change_type_list, scc_list, parent_entity_list, changed_entity_list = \
        changetype_generation(change_information, change_information2)
    '''
    # 输出change信息
    for i in range(len(change_type_list)):
        if change_type_list[i] != '':
            print('\nChangeType:', change_type_list[i])
            print('scc:', scc_list[i])
            print('ChangedEntity:', changed_entity_list[i])
            print('ParentEntity:', parent_entity_list[i])
    '''
    # 根据变更抽取结果统计ChangeType的分布
    change_type = change_type_enum()
    change_type_percentage = [0] * len(change_type)  # 用于保存每种change出现的百分比
    # 若change_type_list长度为0,表示没有不同,change_type_percentage中的元素全为0,否则计算每种change出现的频率
    change_type_dict = changetype_statistic(change_type_list)
    if len(change_type_list) != 0:
        for key, value in change_type_dict.items():
            for i in range(len(change_type)):
                if key == change_type[i]:
                    change_type_percentage[i] = value / len(change_type_list)

    # change_type_percentage在对应位置保存了每种change_type的分布百分比
    # 例如,change_type_percentage[i]保存的是change_type[i]出现的频率
    return change_type_percentage