def transition_seq(sent): tree = Tree.new_from_conll(conll=sent, syn=True, conll_format=u"conllu") non_projs = tree.is_nonprojective() if len(non_projs) > 0: tree.define_projective_order(non_projs) transitions = extract_transitions(tree, sent) return transitions
def __init__(self,sent=None,syn=False): if sent!=None: self.tree=Tree.new_from_conll(sent,syn) self.queue=self.tree.tokens[:] else: self.tree=None self.queue=[] self.stack=[] self.queue=[Token(-1,u"ROOT",lemma=u"ROOT",pos=u"ROOT",feat=u"ROOT")] self.queue+=self.tree.tokens[:] self.score=0.0 self.transitions=[] self.features=defaultdict(lambda:0.0) self.prev_state=None #The state from which this one was created, if any self.wrong_transitions=0 # number of wrong transitions, if 0 then same as gold
def __init__(self, sent=None, syn=False): if sent != None: self.tree = Tree.new_from_conll(sent, syn) self.queue = self.tree.tokens[:] else: self.tree = None self.queue = [] self.stack = [] self.queue = [ Token(-1, u"ROOT", lemma=u"ROOT", pos=u"ROOT", feat=u"ROOT") ] self.queue += self.tree.tokens[:] self.score = 0.0 self.transitions = [] self.features = defaultdict(lambda: 0.0) self.prev_state = None #The state from which this one was created, if any self.wrong_transitions = 0 # number of wrong transitions, if 0 then same as gold
def train(self,inp,progress=0.0,quiet=False): """If inp is string, it will be interpreted as a file, otherwise as open file reading unicode""" total=0 failed=0 non=0 for sent in read_conll(inp): total+=1 gs_tree=Tree.new_from_conll(conll=sent,syn=True) non_projs=gs_tree.is_nonprojective() if len(non_projs)>0: gs_tree.define_projective_order(non_projs) non+=1 try: gs_transitions=self.extract_transitions(gs_tree,sent) self.train_one_sent(gs_transitions,sent,progress) # sent is a conll sentence except ValueError: traceback.print_exc() failed+=1 if not quiet: print u"Failed to parse:",failed print u"Total number of trees:",total print u"Non-projectives:",non print u"Progress:",progress
def train(self, inp, progress=0.0, quiet=False): """If inp is string, it will be interpreted as a file, otherwise as open file reading unicode""" total = 0 failed = 0 non = 0 for sent in read_conll(inp): total += 1 gs_tree = Tree.new_from_conll(conll=sent, syn=True) non_projs = gs_tree.is_nonprojective() if len(non_projs) > 0: gs_tree.define_projective_order(non_projs) non += 1 try: gs_transitions = self.extract_transitions(gs_tree, sent) self.train_one_sent(gs_transitions, sent, progress) # sent is a conll sentence except ValueError: traceback.print_exc() failed += 1 if not quiet: print u"Failed to parse:", failed print u"Total number of trees:", total print u"Non-projectives:", non print u"Progress:", progress