def get_piece_parsed_pack(nodel_l):#多个处理 resl=[] tmp='tmp'#write feature file while True: indexl=[] fts_all=[] i=0 while i<len(nodel_l): if len(nodel_l[i])<=1: i+=1 continue indexl.append(i) fts=get_nodel_feature(nodel_l[i]) fts_all.extend(fts) fts_all.append('') i+=1 if len(indexl)==0: break write_file(tmp,fts_all) tags_all=piece_parse_model_test_pack(tmp)#get result i=0 for tags in tags_all: nodel_l[indexl[i]]=change_nodel(nodel_l[indexl[i]],tags) i+=1 #### for x in nodel_l: if x=='': resl.append('') else: resl.append(x[0]) return resl
def get_piece_parsed(nodel, tag_model, pcfg_model):#返回树节点结构 kbest=[(nodel,0)] #keep k best,(nodes, score)#diff:diff from last one while True: kbest,ktmp=get_undone(kbest,beam_size) if len(ktmp)==0: #all finished break#[0] fts_l=[] for ndl in ktmp: fts=get_nodel_feature(ndl[0]) # get features fts_l.extend(fts) fts_l.append('') #tmp='tmp'#write feature file write_file(piece_parse_tmp,fts_l) crftag_l=piece_parse_model_test(piece_parse_tmp)#get result #print len(crftag_l),len(ktmp) if len(crftag_l)!=len(ktmp): #print crftag_l print 'number not match?piece_parse.py#get_piece_parsed' # new_ktmp=[] # for i in xrange(len(crftag_l)): # # return: [(nodel, diff_node_index, new_tag_features)] # new_ndl_l=change_nodel(ktmp[i][0],crftag_l[i]) # for new_ndl in new_ndl_l: # new_ktmp.append((new_ndl[0], # ktmp[i][1]+count_score(new_ndl[1],pcfg_model))) # #count_score(pcfg_model,new_ndl[1])),the diff, new node, add up it's score new_ndl_l=[] fts_l=[] for i in xrange(len(crftag_l)): # pack the tag crf task to be faster # return: [(nodel, diff_node_index, new_tag_features)] new_ndl_tmp=change_nodel(ktmp[i][0],crftag_l[i]) new_ndl_l.append(new_ndl_tmp) fts_l.extend([x[2] for x in new_ndl_tmp]) new_tag_l=get_new_tag_pack_main(fts_l) tag_index=0 new_ktmp=[] for i in xrange(len(crftag_l)): for new_ndl in new_ndl_l[i]: # new_ndl=(nodel, diff_node_index, new_tag_features) new_ndl[0][new_ndl[1]].tag=new_tag_l[tag_index] tag_index+=1 new_ktmp.append((new_ndl[0], ktmp[i][1]+count_score(new_ndl[0][new_ndl[1]],pcfg_model))) #count_score(pcfg_model,new_ndl[1])),the diff, new node, add up it's score kbest.extend(new_ktmp) #print nodel kbest=[(x[0][0],x[1]) for x in kbest] #only the node left return kbest#todo 每个子句都是前最佳然后拼在一起,不能只是单个子句最佳,done