def train_sent(self, sent): # take sent from corpus and update weight vector # according to this example self.model.update_perceptron_counter() # init state sent = [ROOT] + sent init_state = get_state(sent) # build gold tree gold_tree = get_tree(sent) beam = Beam(self.model.beam_size) top_valid_state = None beam.add(init_state) # loop n-1 step step = 0 # above check for i in xrange(len(sent) - 1): step += 1 # extend current beam beam, top_valid_state = self.extend_beam_for_train(beam, gold_tree) try: if not beam.has_element(top_valid_state.top()): self.update_paramaters(beam.top(), -1) self.update_paramaters(top_valid_state.top(), 1) break except Exception: raise if step == len(sent) - 1: top_beam = beam.top() predict_arcs = top_beam['arcs'] if not compare_arcs(predict_arcs, gold_tree): self.update_paramaters(top_beam, -1) self.update_paramaters(top_valid_state.top(), 1)
def train(self, sent): # update paramaters with one sent # ROOT token at begining of pending sent = [ROOT] + sent # oracle object to check valid action oracle = Oracle(sent) # gold_deps for full update gold_deps = self._build_gold(sent) # create start state init_state = self._get_state(sent) # create beam beam = Beam(self.model.beam_size) # add state to beam beam.add(init_state) # correct action with highest score at one step valid_action = None for step in range(len(sent) - 1): beam, valid_action = self._extend_beam(beam, oracle) # if beam not contain valid action in it, update if not beam.has_element(valid_action): beam_top = beam.top() self.model.update(beam_top, valid_action) break else: beam_top = beam.top() beam_deps = beam_top['deps'] # if final deps is not like gold_deps, do full update if not self._check_equal(gold_deps, beam_deps): self.model.update(beam_top, valid_action)
def _extend_beam(self, beam, oracle): new_beam = Beam(self.model.beam_size) valid_action = Beam(beam_size=1) for state in beam: pending, prev_score, prev_feats, deps, stt = self._extract_state( state) for i, (tok1, tok2) in enumerate(zip(pending, pending[1:])): lc_feats = self.model.featex(pending, deps, i) scores = self.model.get_score(lc_feats) go_feats = prev_feats + lc_feats for clas, score in scores.iteritems(): arc = self._get_action(clas, tok1, tok2) # stt ensure all action before in state is valid if stt: is_valid = self._check_valid(arc, deps, oracle) n_pending, n_deps = self._apply_action(arc, state) if prev_score == float('-inf'): n_score = score else: n_score = prev_score + score new_state = self._get_state(n_pending, go_feats, n_score, clas, n_deps, is_valid) new_beam.add(new_state) if is_valid: valid_action.add(new_state) return new_beam, valid_action.top()
def parse(self, sent): sent = [ROOT] + sent init_state = get_state(sent) beam = Beam(self.model.beam_size) beam.add(init_state) for i in xrange(len(sent) - 1): beam = self.extend_beam_for_parse(beam) final_state = beam.top() return final_state['arcs']
def parse(self, sent): # parse one sent according to current model paramaters # ROOT token at begining of pending sent = [ROOT] + sent # start state init_state = self._get_state(sent) # create a beam beam = Beam(self.model.beam_size) # add state to beam beam.add(init_state) # loop until only one tree left for step in range(len(sent) - 1): # beam of next step beam = self._extend_beam_for_test(beam) # result of parse deps = beam.top()['deps'] return deps