def get_undone(kb,beam_size): #split finished and todo may be be the same tree
    newkb=[]
    string_d=defaultdict(int)
    for nl in kb: #todo,delete the same tree
        string=''.join([node.show() for node in nl[0]])
        if string_d[string]==0:
            newkb.append(nl)
            string_d[string]+=1
    kb=list(newkb)
    kb=sorted(kb,key=lambda x:-x[-1])               #sorted by pcfg
    kb=kb[:beam_size]                               #best beam_size
    done=[item for item in kb if len(item[0])==1]   #todo 假如是单枝的可能还要继续,done
    undone=[item for item in kb if len(item[0])!=1]
    return done,undone
def split_sen(fn,resf):
    lines=[x.strip().decode('utf8') for x in file(fn)]#[:1]
    res=[]
    i=0
    for line in lines:
        if len(line.strip())>0:
            t=read_tree(line)
            tl=split_main(t)
            for node in tl:
##                while not node.isleaf and len(node.son)==1:
##                    node=node.son[0]
                res.append(node.show())
            res.append('')
        i+=1
        if i%1000==0:
            print i
    write_file(resf,res)