def compute_edit_distance(src_file, para_file): src_data = load_conllu(src_file) para_data = load_conllu(para_file) assert len(src_data) == len(para_data) edit_distances = [] for key in tqdm(src_data.keys(), total=len(src_data)): src_sent = src_data[key] para_sent = para_data[key] src_root, _ = head_to_tree(src_sent['head'], src_sent['upos']) para_root, _ = head_to_tree(para_sent['head'], para_sent['upos']) src_tree_to_string = [] treeToString(src_root, src_tree_to_string) src_tree_to_string = ['{'] + src_tree_to_string + ['}'] src_tree_to_string = ''.join(src_tree_to_string) para_tree_to_string = [] treeToString(para_root, para_tree_to_string) para_tree_to_string = ['{'] + para_tree_to_string + ['}'] para_tree_to_string = ''.join(para_tree_to_string) # print(src_tree_to_string) # print(para_tree_to_string) apted = APTED(aptedTree.from_text(src_tree_to_string), aptedTree.from_text(para_tree_to_string)) ted = apted.compute_edit_distance() edit_distances.append(ted) # mapping = apted.compute_edit_mapping() # print(mapping) return edit_distances
def ast_anytree_to_apted(cur_node): """ Converts an Anytree to an equivalent APTED Tree. """ children_apted_nodes = [] for child in list(cur_node.children): children_apted_nodes.append(ast_anytree_to_apted(child)) return Tree(cur_node.op_name + " " + cur_node.op_class, *children_apted_nodes)
def mid2tr(tactr, mdx): return Tree("FOO").from_text( tactr.mid_decoder.decode_exp_by_key(mdx).apted_tree())
def kern2tr(tactr, kdx): return Tree("FOO").from_text( tactr.decoder.decode_exp_by_key(kdx).apted_tree())
def gen_apted_tree(code): from apted.helpers import Tree tree_str = apted_ast_visit(ast.parse(code)) return Tree.from_text(tree_str)
x_embs1.append(embs1) x_info_ids1.append(info_ids1) c_loss = get_cluster_score(x_embs1, x_info_ids1) c_loss = torch.Tensor([c_loss]) c_loss.requires_grad_() c_loss = c_loss.cuda() #------------loss_s--------------------# s_len = len(samples_tree) _loss = [] _losses = 0 for i in range(s_len): # 32 _uid = real_DT[i].split('_')[0] tt1_i = train_DT_id.index(_uid) tt1 = train_DT[tt1_i] tree1 = Tree.from_text(tt1) tt2 = samples_tree[i] tree2 = Tree.from_text(tt2) _apted = APTED(tree1, tree2, Config()) ted = _apted.compute_edit_distance() _loss.append(ted) _losses += ted t_loss = torch.mean(torch.Tensor([_loss])) t_loss.requires_grad_() t_loss = t_loss.cuda() #-------------------------------------------# # construct the input to the genrator, add zeros before samples and delete the last column zeros = torch.zeros((BATCH_SIZE, 1)).type(torch.LongTensor) if samples.is_cuda: