def compute_edit_distance(src_file, para_file): src_data = load_conllu(src_file) para_data = load_conllu(para_file) assert len(src_data) == len(para_data) edit_distances = [] for key in tqdm(src_data.keys(), total=len(src_data)): src_sent = src_data[key] para_sent = para_data[key] src_root, _ = head_to_tree(src_sent['head'], src_sent['upos']) para_root, _ = head_to_tree(para_sent['head'], para_sent['upos']) src_tree_to_string = [] treeToString(src_root, src_tree_to_string) src_tree_to_string = ['{'] + src_tree_to_string + ['}'] src_tree_to_string = ''.join(src_tree_to_string) para_tree_to_string = [] treeToString(para_root, para_tree_to_string) para_tree_to_string = ['{'] + para_tree_to_string + ['}'] para_tree_to_string = ''.join(para_tree_to_string) # print(src_tree_to_string) # print(para_tree_to_string) apted = APTED(aptedTree.from_text(src_tree_to_string), aptedTree.from_text(para_tree_to_string)) ted = apted.compute_edit_distance() edit_distances.append(ted) # mapping = apted.compute_edit_mapping() # print(mapping) return edit_distances
def apted_code_distance(code_a, code_b): tree_a = gen_apted_tree(code_a) tree_b = gen_apted_tree(code_b) from apted import APTED apted = APTED(tree_a, tree_b) ted = apted.compute_edit_distance() return ted
def count_distance(a_ast, b_ast): """ Counts tree edit distance between two ast trees. """ a_ast = build_tree(a_ast) b_ast = build_tree(b_ast) apted = APTED(a_ast, b_ast, CustomConfig()) ted = apted.compute_edit_distance() return ted
def apted(tree1, tree2): # remove outer brackets and strip all white space str_t1 = apted_tree_format(tree1).strip()[1:-1].strip() str_t2 = apted_tree_format(tree2).strip()[1:-1].strip() # convert to apted tree from apted format t1 = helpers.Tree.from_text(str_t1) t2 = helpers.Tree.from_text(str_t2) apted = APTED(t1, t2) return apted.compute_edit_distance()
def get_tree_edit_distance(tree1, tree2): class TreeEditDistanceConfig(Config): def __init__(self): pass def rename(self, node1, node2): return 1 if node1.value != node2.value else 0 def children(self, node): return [x for x in node.children] apted = APTED(tree1, tree2, TreeEditDistanceConfig()) ed = apted.compute_edit_distance() return ed
def diff(tree_before: Node, tree_after: Node) -> (int, dict): """ Returns the difference between two QEP trees :param tree_before: The 'before tree'. :param tree_after: The 'after tree'. :return: distance: The structural edit distance between the two trees. Only difference in algorithm is captured. delta: The difference between the two trees. Has 3 keys: - deleted: Those nodes that are deleted from tree_before - inserted: Those nodes that are inserted into tree_after - stayed: Those nodes that are present in both trees. Has two keys: - before: the nodes in tree_before - after : the nodes in tree_after Note that the before and after may be different in attributes other than algorithm and operation. """ apted = APTED(tree_before, tree_after, APTEDConfig()) distance = apted.compute_edit_distance() mapping = apted.compute_edit_mapping() delta = { "deleted": [m[0] for m in mapping if m[1] is None], "inserted": [m[1] for m in mapping if m[0] is None], "stayed": { "before": [m[0] for m in mapping if m[0] is not None and m[1] is not None], "after": [m[1] for m in mapping if m[0] is not None and m[1] is not None] } } return distance, delta
def treeDistance(tree1, tree2): """Compute distance between two trees""" tree1, tree2 = treeToTree(tree1), treeToTree(tree2) ap = APTED(tree1, tree2) return ap.compute_edit_distance()
def calculate_edit_distance(tree1, tree2): apted = APTED(tree1, tree2, TreeEditDistanceConfig()) ed = apted.compute_edit_distance() return ed
c_loss = torch.Tensor([c_loss]) c_loss.requires_grad_() c_loss = c_loss.cuda() #------------loss_s--------------------# s_len = len(samples_tree) _loss = [] _losses = 0 for i in range(s_len): # 32 _uid = real_DT[i].split('_')[0] tt1_i = train_DT_id.index(_uid) tt1 = train_DT[tt1_i] tree1 = Tree.from_text(tt1) tt2 = samples_tree[i] tree2 = Tree.from_text(tt2) _apted = APTED(tree1, tree2, Config()) ted = _apted.compute_edit_distance() _loss.append(ted) _losses += ted t_loss = torch.mean(torch.Tensor([_loss])) t_loss.requires_grad_() t_loss = t_loss.cuda() #-------------------------------------------# # construct the input to the genrator, add zeros before samples and delete the last column zeros = torch.zeros((BATCH_SIZE, 1)).type(torch.LongTensor) if samples.is_cuda: zeros = zeros.cuda() inputs = Variable(torch.cat([zeros, samples.data], dim = 1)[:, :-1].contiguous()) targets = Variable(samples.data).contiguous().view((-1,))