コード例 #1
0
def get_piece_parsed(nodel, tag_model, pcfg_model):#返回树节点结构
    kbest=[(nodel,0)] #keep k best,(nodes,  score)#diff:diff from last one
    while True:
        kbest,ktmp=get_undone(kbest,beam_size)
        if len(ktmp)==0:    #all finished
            break#[0]
        fts_l=[]
        for ndl in ktmp:
            fts=get_nodel_feature(ndl[0]) # get features
            fts_l.extend(fts)
            fts_l.append('')
        #tmp='tmp'#write feature file
        write_file(piece_parse_tmp,fts_l)
        crftag_l=piece_parse_model_test(piece_parse_tmp)#get result
        #print len(crftag_l),len(ktmp)
        if len(crftag_l)!=len(ktmp):
            #print crftag_l
            print 'number not match?piece_parse.py#get_piece_parsed'
        # new_ktmp=[]
        # for i in xrange(len(crftag_l)):
        #     # return: [(nodel, diff_node_index, new_tag_features)]
        #     new_ndl_l=change_nodel(ktmp[i][0],crftag_l[i])
        #     for new_ndl in new_ndl_l:
        #         new_ktmp.append((new_ndl[0],
        #             ktmp[i][1]+count_score(new_ndl[1],pcfg_model)))
        #         #count_score(pcfg_model,new_ndl[1])),the diff, new node, add up it's score
        new_ndl_l=[]
        fts_l=[]
        for i in xrange(len(crftag_l)): # pack the tag crf task to be faster
            # return: [(nodel, diff_node_index, new_tag_features)]
            new_ndl_tmp=change_nodel(ktmp[i][0],crftag_l[i])
            new_ndl_l.append(new_ndl_tmp)
            fts_l.extend([x[2] for x in new_ndl_tmp])
        new_tag_l=get_new_tag_pack_main(fts_l)
        tag_index=0
        new_ktmp=[]
        for i in xrange(len(crftag_l)):
            for new_ndl in new_ndl_l[i]:
                # new_ndl=(nodel, diff_node_index, new_tag_features)
                new_ndl[0][new_ndl[1]].tag=new_tag_l[tag_index]
                tag_index+=1
                new_ktmp.append((new_ndl[0],
                    ktmp[i][1]+count_score(new_ndl[0][new_ndl[1]],pcfg_model)))
                #count_score(pcfg_model,new_ndl[1])),the diff, new node, add up it's score
        kbest.extend(new_ktmp)
        #print nodel
    kbest=[(x[0][0],x[1]) for x in kbest] #only the node left
    return kbest#todo 每个子句都是前最佳然后拼在一起,不能只是单个子句最佳,done
def get_piece_joint(kbest,tag_model,pcfg_model):#返回树节点结构
    '''
    nodel:a piece list(kbest), like[p1,p2,...]
    '''
    #kbest=[(nodel,0)] #keep k best,(nodes,  score)#diff:diff from last one
    #kbest=list(nodel)
    while True:
        kbest,ktmp=get_undone(kbest,beam_size)
        if len(ktmp)==0:    #all finished
            break #return the final tree
        fts_l=[]
        for ndl in ktmp:
            fts=get_piece_joint_feature(ndl[0]) # get features
            fts_l.extend(fts)
            fts_l.append('')
        #tmp='tmp'#write feature file
        write_file(piece_joint_tmp,fts_l)
        crftag_l=piece_joint_model_test(piece_joint_tmp)#get result
        if len(crftag_l)!=len(ktmp):
            print 'number not match?piece_parse.py#get_piece_parsed'
        new_ndl_l=[]
        fts_l=[]
        for i in xrange(len(crftag_l)): # pack the tag crf task to be faster
            # return: [(nodel, diff_node_index, new_tag_features)]
            new_ndl_tmp=change_nodel(ktmp[i][0],crftag_l[i])
            new_ndl_l.append(new_ndl_tmp)
            fts_l.extend([x[2] for x in new_ndl_tmp])
        new_tag_l=get_new_tag_pack_main(fts_l)
        tag_index=0
        new_ktmp=[]
        for i in xrange(len(crftag_l)):
            for new_ndl in new_ndl_l[i]:
                # new_ndl=(nodel, diff_node_index, new_tag_features)
                new_ndl[0][new_ndl[1]].tag=new_tag_l[tag_index]
                tag_index+=1
                new_ktmp.append((new_ndl[0],
                    ktmp[i][1]+count_score(new_ndl[0][new_ndl[1]],pcfg_model)))
                #count_score(pcfg_model,new_ndl[1])),the diff, new node, add up it's score
        kbest.extend(new_ktmp)
        #print nodel
    kbest=[(x[0][0],x[1]) for x in kbest] #only the node left
    return kbest#最后只剩下一个节点