def get_new_nodel(nl,flg): # tag model, node list, tag model(if any) # get new nodel list by the crf tag # return: [(nodel, diff_node)] # todo find kbest one by one changed and record the diff,done nodel_l=[] i=0 while i<len(flg): if flg[i]!=3: new_nl=nl[:i] if flg[i]==0:#####BBBBB###### a=node() a.son=[nl[i],nl[i+1]] a.head=nl[i].head a.head_pos=nl[i].head_pos #new_nl=nl[:i] new_nl.append(a) new_nl+=nl[i+2:] #nodel_l.append(a) i+=1 elif flg[i]==1:####EEEEE### a=node() a.son=[nl[i],nl[i+1]] a.head=nl[i+1].head a.head_pos=nl[i+1].head_pos #new_nl=nl[:i] new_nl.append(a) new_nl+=nl[i+2:] #nodel_l.append(a) i+=1 elif flg[i]==2:############SSSS###### a=node() a.son=[nl[i]] a.head=nl[i].head a.head_pos=nl[i].head_pos #new_nl=nl[:i] new_nl.append(a) new_nl+=nl[i+1:] #nodel_l.append(a) else: print '?' if flg[i]==2: # fts=get_tag_feature_final(ts,ind,type_tag) fts=get_tag_feature_final(nl,i,1) nodel_l.append((new_nl,i,fts))# new node list, diff son else: # todo,attention!!!!!!<<i-1>> fts=get_tag_feature_final(nl,i-1,2) nodel_l.append((new_nl,i-1,fts))# new node list, diff son #nodel_l[-1].tag=ntag else: pass #nodel_l.append(nl[i]) i+=1 ################todo,notify return nodel_l
def multi_2_binary_trick(t,tag):#all leaf if len(t.son)<=2: return t elif t.son[-1].tag=='ETC' or t.son[-1].word in end_punc:#**等,**。 tt=deepcopy(t) tt.son=tt.son[:-1] new_son=multi_2_binary_trick(tt,tag) t.son=[new_son,t.son[-1]] return t elif t.son[0].tag==t.son[-1].tag=='PU' and\ len(t.son[0].word)==len(t.son[-1].word)==1 and\ abs(ord(t.son[0].word)-ord(t.son[-1].word))==1:#"**",<> tt=deepcopy(t) tt.son=tt.son[1:-1] new_son1=multi_2_binary_trick(tt,tag) new_son2=node(atag=tag+'*') new_son2.son=[new_son1,t.son[-1]] t.son=[t.son[0],new_son2]####t.son[0]!!! return t t=check_item_complete(t,tag)###每一项都是完整的一项了 if len(t.son)>2: t=get_son_join(t.son,tag) t.tag=t.tag.rstrip('*') ###check n1 cc n2 return t
def get_son_join(sonl,tag): new_node=node(atag=tag+'*') new_node.son=[sonl[-2],sonl[-1]] if len(sonl)==2: return new_node else: sonll=sonl[:-2] sonll.append(new_node) return get_son_join(sonll,tag)
def binary_right(rightson,sons,tag): #check? new_son=[] new_son=[sons[0],sons[1]] new_node=node(atag=tag+'*') new_node.son=new_son if len(rightson)==0: return new_node else: return binary_right(rightson[1:],[new_node,rightson[0]],tag)##right!!
def binary_left(leftson,sons,tag): #check? new_son=[] new_son=[sons[0],sons[1]] new_node=node(atag=tag+'*') new_node.son=new_son if len(leftson)==0: return new_node else: return binary_left(leftson[:-1],[leftson[-1],new_node],tag)
def get_pieces(res): pieces=[] tmpl=[] for w in res: ##### nnode=node(aleaf=True,aword=w[0],atag=w[1],apos=w[1]) if w[-1] in ['B','S']:### if len(tmpl)!=0: pieces.append(tmpl) tmpl=[] if w[-1]=='E': tmpl.append(nnode) pieces.append(tmpl) tmpl=[] elif w[-1]=='S': tmpl.append(nnode) pieces.append(tmpl) tmpl=[] else: tmpl.append(nnode) if len(tmpl)!=0: pieces.append(tmpl) return pieces