def __init__(self,model=None): self.features=JumpFeatures() self.model=model self.resttypes=set([u"cc",u"conj",u"punct",u"ellipsis"])
class ConjPropagation(object): def __init__(self,model=None): self.features=JumpFeatures() self.model=model self.resttypes=set([u"cc",u"conj",u"punct",u"ellipsis"]) def can_jump(self,dep,tree): """ Check whether dep can jump. """ if dep.dtype in self.resttypes: return False for conj in tree.conjs: if dep.dep==conj.gov or dep.gov==conj.gov: return True return False def possible_jumps(self,g,d,tree): """ g,d is one candidate for propagation, returns a set of 'propagated' (gov,dep) tuples """ candidates=set() for conj in tree.conjs: if conj.gov==d: # dependent can move candidates.add((g,conj.dep)) elif conj.gov==g: # governor can move candidates.add((conj.dep,d)) return candidates # Recursive search for all possible rec jumps def gather_all_jumps(self,g,d,tree): recs=self.possible_jumps(g,d,tree) if len(recs)>0: new_set=set() for gov,dep in recs: new_set|=self.gather_all_jumps(gov,dep,tree) recs|=new_set return recs else: return recs def learn(self,tree,outfile): for dep in tree.deps: if dep.flag!=u"CC" and self.can_jump(dep,tree): if dep.dtype==u"rel": continue new_deps=self.gather_all_jumps(dep.gov,dep.dep,tree) for g,d in new_deps: types=is_dep(g,d,tree) if not types: klass=u"no" elif len(types)==2 and u"rel" in types: for t in types: if t!=u"rel": klass=t break else: assert len(types)<2 klass=types[0] features=self.features.create(dep,g,d,tree) writeData(outfile,klass,features) def predict(self,tree): """ Jump one tree. """ if self.model is None: print >> sys.stderr, u"no model found" sys.exit(1) new=[] for dep in tree.deps: if self.can_jump(dep,tree): if dep.dtype==u"rel": continue # this is just the rel, we want the secondary function new_deps=self.gather_all_jumps(dep.gov,dep.dep,tree) for g,d in new_deps: features=self.features.create(dep,g,d,tree) #...should return (name,value) tuples klass=self.model.predict_one(features) klass_str=self.model.number2klass[klass] if klass_str==u"no": continue if u"&" in klass_str: # this is merged rel #print >> sys.stderr, klass_str dependency=Dep(g,d,u"rel",flag=u"CC") # add also rel new.append(dependency) dependency=Dep(g,d,klass_str,flag=u"CC") new.append(dependency) for dep in new: tree.add_dep(dep)