def convert_penn_to_kaf_with_numtokens(tree_str,term_ids,logging,lemma_for_termid,off_t=0,off_nt=0,off_edge=0):
    global list_t, list_nt,list_edge,cnt_t, cnt_nt, cnt_edge
    list_t = []
    list_nt = []
    list_edge = []
    cnt_t = off_t
    cnt_nt = off_nt
    cnt_edge = off_edge

    this_tree = Tree(tree_str)
    logging.debug('\n'+str(this_tree))    ##It has been already encoded using UTF8
    for num, num_token_and_token in enumerate(this_tree.leaves()):
        ## token is not used at all
        ##print num,token,position,token_id
        p = num_token_and_token.find('#')
        num_token = int(num_token_and_token[:p])
        position = this_tree.leaf_treeposition(num)
        token_id = term_ids[int(num_token)]
        this_tree[position] = token_id
        logging.debug('Matching '+num_token_and_token+' with term id='+token_id+'  according to KAF lemma='+str(lemma_for_termid.get(token_id).encode('utf-8')))

    ##Creat the ROOT
    create_extra_root = False
    nt_id = None
    if create_extra_root:
        nt_id = 'nter'+str(cnt_nt)
        cnt_nt +=1
        list_nt.append((nt_id,'ROOT'))
    
    visit_node(this_tree, nt_id)
 
    root = etree.Element('tree')
    nonter_heads = set()
    #Nonter
    labels_for_nt = {}
    for nt_id, label in list_nt:
        ##Checking the head
        if len(label)>=2 and label[-1]=='H' and label[-2]=='=':
            nonter_heads.add(nt_id)
            label = label[:-2]
        ele = etree.Element('nt', attrib={'id':nt_id,'label':label})
        labels_for_nt[nt_id] = label
        root.append(ele)
    
    ## Terminals
    lemma_for_ter = {}
    for ter_id, span_ids in list_t:
        ele = etree.Element('t',attrib={'id':ter_id})
        span = etree.Element('span')
        ele.append(span)
        for termid in span_ids.split(' '):
            target = etree.Element('target',attrib={'id':termid})
            span.append(target)
        lemma_for_ter[ter_id] = lemma_for_termid[termid]
        root.append(ele)
        
    ##Edges
    #for edge_id,node_to,node_from in list_edge:
    for edge_id, node_from, node_to in list_edge:
        ele = etree.Element('edge',attrib={'id':edge_id,'from':node_from,'to':node_to})
        
        ## For the comment
        ##Only non-ter
        label_to = labels_for_nt.get(node_to)
        
        ##Could be ter or nonter
        label_from = labels_for_nt.get(node_from)
        if label_from is None:
            label_from = lemma_for_ter.get(node_from,'kk')
                                        
        comment = '  '+(edge_id)+'  '+(label_to)+' <- '+(label_from)+' '
        comment = comment.replace('--','-')
        if node_from in nonter_heads:
            ele.set('head','yes')
        root.append(etree.Comment(comment))
        root.append(ele)
    
    return root,cnt_t,cnt_nt,cnt_edge
def convert_penn_to_knaf_with_numtokens(tree_str,term_ids,lemma_for_termid,off_t=0,off_nt=0,off_edge=0):
    global list_t, list_nt,list_edge,cnt_t, cnt_nt, cnt_edge
    list_t = []
    list_nt = []
    list_edge = []
    cnt_t = off_t
    cnt_nt = off_nt
    cnt_edge = off_edge

    this_tree = Tree(tree_str)
    for num, num_token_and_token in enumerate(this_tree.leaves()):
        ## token is not used at all
        ##print num,token,position,token_id
        p = num_token_and_token.find('#')
        num_token = int(num_token_and_token[:p])
        position = this_tree.leaf_treeposition(num)
        token_id = term_ids[int(num_token)]
        this_tree[position] = token_id

    ##Creat the ROOT
    create_extra_root = False
    nt_id = None
    if create_extra_root:
        nt_id = 'nter'+str(cnt_nt)
        cnt_nt +=1
        list_nt.append((nt_id,'ROOT'))
    
    visit_node(this_tree, nt_id)
 
    this_tree = Ctree()
    nonter_heads = set()
    #Nonter
    labels_for_nt = {}
    for nt_id, label in list_nt:
        ##Checking the head
        if len(label)>=2 and label[-1]=='H' and label[-2]=='=':
            nonter_heads.add(nt_id)
            label = label[:-2]
        nt_obj = Cnonterminal()
        nt_obj.set_id(nt_id)
        nt_obj.set_label(label)
        labels_for_nt[nt_id] = label
        this_tree.append_element(nt_obj)
    
    ## Terminals
    lemma_for_ter = {}
    for ter_id, span_ids in list_t:
        ter_obj = Cterminal()
        ter_obj.set_id(ter_id)
        this_span = Cspan()
        term_ids = span_ids.split(' ')
        this_span.create_from_ids(term_ids)  
        ter_obj.set_span(this_span)      
        lemma_for_ter[ter_id] = lemma_for_termid[term_ids[-1]]
        this_tree.append_element(ter_obj)
        
    ##Edges
    #for edge_id,node_to,node_from in list_edge:
    for edge_id, node_from, node_to in list_edge:
        edge_obj = Cedge()
        edge_obj.set_id(edge_id)
        edge_obj.set_from(node_from)
        edge_obj.set_to(node_to)
        
        ## For the comment
        ##Only non-ter
        label_to = labels_for_nt.get(node_to)
        
        ##Could be ter or nonter
        label_from = labels_for_nt.get(node_from)
        if label_from is None:
            label_from = lemma_for_ter.get(node_from,'kk')
                                        
        comment = '  '+(edge_id)+'  '+(label_to)+' <- '+(label_from)+' '
        comment = comment.replace('--','-')
        if node_from in nonter_heads:
            edge_obj.set_as_head()
        edge_obj.set_comment(comment)
        this_tree.append_element(edge_obj)
    
    return this_tree,cnt_t,cnt_nt,cnt_edge
def convert_penn_to_kaf(tree_str, term_ids, logging, lemma_for_termid, off_t, off_nt, off_edge):
    global list_t, list_nt, list_edge, cnt_t, cnt_nt, cnt_edge
    list_t = []
    list_nt = []
    list_edge = []
    cnt_t = off_t
    cnt_nt = off_nt
    cnt_edge = off_edge

    this_tree = Tree(tree_str)
    logging.debug("\n" + str(this_tree))

    for num, token in enumerate(this_tree.leaves()):
        position = this_tree.leaf_treeposition(num)
        token_id = term_ids[num]
        this_tree[position] = token_id
        logging.debug(
            "Matching "
            + token
            + " with term id="
            + token_id
            + " which according to KAF lemma="
            + str(lemma_for_termid.get(token_id).encode("utf-8"))
        )

    ##Creat the ROOT
    create_extra_root = False
    nt_id = None
    if create_extra_root:
        nt_id = "nter" + str(cnt_nt)
        cnt_nt += 1
        list_nt.append((nt_id, "ROOT"))

    visit_node(this_tree, nt_id)

    root = etree.Element("tree")
    nonter_heads = set()
    # Nonter
    labels_for_nt = {}
    for nt_id, label in list_nt:
        ##Checking the head
        if len(label) >= 2 and label[-1] == "H" and label[-2] == "=":
            nonter_heads.add(nt_id)
            label = label[:-2]
        ele = etree.Element("nt", attrib={"id": nt_id, "label": label})
        labels_for_nt[nt_id] = label
        root.append(ele)

    ## Terminals
    lemma_for_ter = {}
    for ter_id, span_ids in list_t:
        ele = etree.Element("t", attrib={"id": ter_id})
        span = etree.Element("span")
        ele.append(span)
        for termid in span_ids.split(" "):
            target = etree.Element("target", attrib={"id": termid})
            span.append(target)
        lemma_for_ter[ter_id] = lemma_for_termid.get(termid, "unknown")
        root.append(ele)

    ##Edges
    # for edge_id,node_to,node_from in list_edge:
    for edge_id, node_from, node_to in list_edge:
        ele = etree.Element("edge", attrib={"id": edge_id, "from": node_from, "to": node_to})

        ## For the comment
        ##Only non-ter
        label_to = labels_for_nt.get(node_to)

        ##Could be ter or nonter
        label_from = labels_for_nt.get(node_from)
        if label_from is None:
            label_from = lemma_for_ter.get(node_from, "kk")

        comment = "  " + (edge_id) + "  " + (label_to) + " <- " + (label_from) + " "

        if node_from in nonter_heads:
            ele.set("head", "yes")
        root.append(etree.Comment(comment))
        root.append(ele)

    return root, cnt_t, cnt_nt, cnt_edge