Beispiel #1
0
 def train_sent(self, sent):
     # take sent from corpus and update weight vector
     # according to this example
     self.model.update_perceptron_counter()
     # init state
     sent = [ROOT] + sent
     init_state = get_state(sent)
     # build gold tree
     gold_tree = get_tree(sent)
     beam = Beam(self.model.beam_size)
     top_valid_state = None
     beam.add(init_state)
     # loop n-1 step
     step = 0
     # above check
     for i in xrange(len(sent) - 1):
         step += 1
         # extend current beam
         beam, top_valid_state = self.extend_beam_for_train(beam, gold_tree)
         try:
             if not beam.has_element(top_valid_state.top()):
                 self.update_paramaters(beam.top(), -1)
                 self.update_paramaters(top_valid_state.top(), 1)
                 break
         except Exception:
             raise
     if step == len(sent) - 1:
         top_beam = beam.top()
         predict_arcs = top_beam['arcs']
         if not compare_arcs(predict_arcs, gold_tree):
             self.update_paramaters(top_beam, -1)
             self.update_paramaters(top_valid_state.top(), 1)
Beispiel #2
0
 def train(self, sent):
     # update paramaters with one sent
     # ROOT token at begining of pending
     sent = [ROOT] + sent
     # oracle object to check valid action
     oracle = Oracle(sent)
     # gold_deps for full update
     gold_deps = self._build_gold(sent)
     # create start state
     init_state = self._get_state(sent)
     # create beam
     beam = Beam(self.model.beam_size)
     # add state to beam
     beam.add(init_state)
     # correct action with highest score at one step
     valid_action = None
     for step in range(len(sent) - 1):
         beam, valid_action = self._extend_beam(beam, oracle)
         # if beam not contain valid action in it, update
         if not beam.has_element(valid_action):
             beam_top = beam.top()
             self.model.update(beam_top, valid_action)
             break
     else:
         beam_top = beam.top()
         beam_deps = beam_top['deps']
         # if final deps is not like gold_deps, do full update
         if not self._check_equal(gold_deps, beam_deps):
             self.model.update(beam_top, valid_action)
Beispiel #3
0
 def _extend_beam(self, beam, oracle):
     new_beam = Beam(self.model.beam_size)
     valid_action = Beam(beam_size=1)
     for state in beam:
         pending, prev_score, prev_feats, deps, stt = self._extract_state(
             state)
         for i, (tok1, tok2) in enumerate(zip(pending, pending[1:])):
             lc_feats = self.model.featex(pending, deps, i)
             scores = self.model.get_score(lc_feats)
             go_feats = prev_feats + lc_feats
             for clas, score in scores.iteritems():
                 arc = self._get_action(clas, tok1, tok2)
                 # stt ensure all action before in state is valid
                 if stt:
                     is_valid = self._check_valid(arc, deps, oracle)
                 n_pending, n_deps = self._apply_action(arc, state)
                 if prev_score == float('-inf'):
                     n_score = score
                 else:
                     n_score = prev_score + score
                 new_state = self._get_state(n_pending, go_feats, n_score,
                                             clas, n_deps, is_valid)
                 new_beam.add(new_state)
                 if is_valid:
                     valid_action.add(new_state)
     return new_beam, valid_action.top()
Beispiel #4
0
 def parse(self, sent):
     sent = [ROOT] + sent
     init_state = get_state(sent)
     beam = Beam(self.model.beam_size)
     beam.add(init_state)
     for i in xrange(len(sent) - 1):
         beam = self.extend_beam_for_parse(beam)
     final_state = beam.top()
     return final_state['arcs']
Beispiel #5
0
 def parse(self, sent):
     # parse one sent according to current model paramaters
     # ROOT token at begining of pending
     sent = [ROOT] + sent
     # start state
     init_state = self._get_state(sent)
     # create a beam
     beam = Beam(self.model.beam_size)
     # add state to beam
     beam.add(init_state)
     # loop until only one tree left
     for step in range(len(sent) - 1):
         # beam of next step
         beam = self._extend_beam_for_test(beam)
     # result of parse
     deps = beam.top()['deps']
     return deps