def raduhead(t,dbgmsg='',headword=True): m=headindex_re.match(t.label) t.label_orig=t.label if m is None: if headword and t.is_preterminal(): t.headword=(t.label,t.children[0].label) return label,n,head=m.group(1,2,3) skips=(raduhead_npb_skips if label=='NPB' else raduhead_skips) #warn('raduhead','%s=%s,%s,%s'%(t.label,label,head,n)) t.head=head=int(head) n=int(n) i=0 #i==0 and c.label==':' or cn=[] for c in t.children: if c.label not in skips: cn.append(c) i+=1 t.head_children=cn t.good_head=(n==len(cn) and head<=n and head>0) if not t.good_head: warn('wrong head index for %s'%label,('%s!=%s %s => %s %s')%(n,len(cn),t.label,[c.label_orig for c in t.children],dbgmsg),max=None) if headword: t.headword=t.head_children[-1].headword elif headword: t.headword=t.head_children[head-1].headword t.label=label
def str_to_tree_warn(s,paren_after_root=False,max=None): toks=tree.tokenizer.findall(s) if len(toks)>2 and toks[0] == '(' and toks[1]=='(' and toks[-2]==')' and toks[-1]==')': #berkeley parse ( (tree) ) toks=toks[1:-1] (t,n)=tree.scan_tree(toks,0,paren_after_root) if t is None: warn("scan_tree failed",": %s of %s: %s ***HERE*** %s"%(n,len(toks),' '.join(toks[:n]),' '.join(toks[n:])),max=max) return t