def train_iteration(self, filepath): viterbi = Viterbi("EMPTY") viterbi.v = self.v with open(filepath) as train_file: corpus = gen_sentence_train(train_file) count = 0 for doc in corpus: count += 1 if count % 1000 == 0: _logger.debug("%d sentence processed" % count) sent = [s[0] for s in doc] tags = [s[1] for s in doc] tags_pred = viterbi.decode_one(list(sent)) assert len(sent) == len(tags) == len(tags_pred) feat_gold = feat_vect(sent, tags) feat_pred = feat_vect(sent, tags_pred) for feat in feat_pred: self.v[feat] -= feat_pred[feat] for feat in feat_gold: self.v[feat] += feat_gold[feat]