Exemple #1
0
def compute_edit_distance(src_file, para_file):
    src_data = load_conllu(src_file)
    para_data = load_conllu(para_file)
    assert len(src_data) == len(para_data)

    edit_distances = []
    for key in tqdm(src_data.keys(), total=len(src_data)):
        src_sent = src_data[key]
        para_sent = para_data[key]
        src_root, _ = head_to_tree(src_sent['head'], src_sent['upos'])
        para_root, _ = head_to_tree(para_sent['head'], para_sent['upos'])
        src_tree_to_string = []
        treeToString(src_root, src_tree_to_string)
        src_tree_to_string = ['{'] + src_tree_to_string + ['}']
        src_tree_to_string = ''.join(src_tree_to_string)
        para_tree_to_string = []
        treeToString(para_root, para_tree_to_string)
        para_tree_to_string = ['{'] + para_tree_to_string + ['}']
        para_tree_to_string = ''.join(para_tree_to_string)
        # print(src_tree_to_string)
        # print(para_tree_to_string)
        apted = APTED(aptedTree.from_text(src_tree_to_string),
                      aptedTree.from_text(para_tree_to_string))
        ted = apted.compute_edit_distance()
        edit_distances.append(ted)
        # mapping = apted.compute_edit_mapping()
        # print(mapping)

    return edit_distances
Exemple #2
0
def ast_anytree_to_apted(cur_node):
    """
    Converts an Anytree to an equivalent APTED Tree.
    """
    children_apted_nodes = []
    for child in list(cur_node.children):
        children_apted_nodes.append(ast_anytree_to_apted(child))
    return Tree(cur_node.op_name + " " + cur_node.op_class,
                *children_apted_nodes)
Exemple #3
0
def mid2tr(tactr, mdx):
    return Tree("FOO").from_text(
        tactr.mid_decoder.decode_exp_by_key(mdx).apted_tree())
Exemple #4
0
def kern2tr(tactr, kdx):
    return Tree("FOO").from_text(
        tactr.decoder.decode_exp_by_key(kdx).apted_tree())
Exemple #5
0
def gen_apted_tree(code):
    from apted.helpers import Tree
    tree_str = apted_ast_visit(ast.parse(code))
    return Tree.from_text(tree_str)
Exemple #6
0
         x_embs1.append(embs1)
         x_info_ids1.append(info_ids1)
         
     c_loss = get_cluster_score(x_embs1, x_info_ids1)
     c_loss = torch.Tensor([c_loss])
     c_loss.requires_grad_()
     c_loss = c_loss.cuda()                        
     #------------loss_s--------------------#
     s_len = len(samples_tree)
     _loss = []
     _losses = 0
     for i in range(s_len): # 32
         _uid = real_DT[i].split('_')[0]
         tt1_i = train_DT_id.index(_uid)
         tt1 = train_DT[tt1_i]
         tree1 = Tree.from_text(tt1)
         tt2 = samples_tree[i]
         tree2 = Tree.from_text(tt2)
         _apted = APTED(tree1, tree2, Config())
         ted = _apted.compute_edit_distance()
         _loss.append(ted)
         _losses += ted
     
     t_loss = torch.mean(torch.Tensor([_loss]))
     t_loss.requires_grad_()
     t_loss = t_loss.cuda()
     #-------------------------------------------# 
 
 # construct the input to the genrator, add zeros before samples and delete the last column
 zeros = torch.zeros((BATCH_SIZE, 1)).type(torch.LongTensor)
 if samples.is_cuda: