def test_dump_load(model): loc = '/tmp/test_model' model.end_training() model.dump(loc) string = open(loc, 'rb').read() assert string new_model = AveragedPerceptron([(1,), (2,), (3,), (4,)]) nr_class = 5 assert get_scores(nr_class, model, {1: 1, 3: 1, 4: 1}) != \ get_scores(nr_class, new_model, {1:1, 3:1, 4:1}) assert get_scores(nr_class, model, {2:1, 5:1}) != \ get_scores(nr_class, new_model, {2:1, 5:1}) assert get_scores(nr_class, model, {2:1, 3:1, 4:1}) != \ get_scores(nr_class, new_model, {2:1, 3:1, 4:1}) new_model.load(loc) assert get_scores(nr_class, model, {1:1, 3:1, 4:1}) == \ get_scores(nr_class, new_model, {1:1, 3:1, 4:1}) assert get_scores(nr_class, model, {2:1, 5:1}) == \ get_scores(nr_class, new_model, {2:1, 5:1}) assert get_scores(nr_class, model, {2:1, 3:1, 4:1}) == \ get_scores(nr_class, new_model, {2:1, 3:1, 4:1})
def test_dump_load(model): loc = tempfile.mkstemp()[1] model.end_training() model.dump(loc) string = open(loc, 'rb').read() assert string new_model = AveragedPerceptron([(1, ), (2, ), (3, ), (4, )]) nr_class = 5 assert get_scores(nr_class, model, {1: 1, 3: 1, 4: 1}) != \ get_scores(nr_class, new_model, {1:1, 3:1, 4:1}) assert get_scores(nr_class, model, {2:1, 5:1}) != \ get_scores(nr_class, new_model, {2:1, 5:1}) assert get_scores(nr_class, model, {2:1, 3:1, 4:1}) != \ get_scores(nr_class, new_model, {2:1, 3:1, 4:1}) new_model.load(loc) assert get_scores(nr_class, model, {1:1, 3:1, 4:1}) == \ get_scores(nr_class, new_model, {1:1, 3:1, 4:1}) assert get_scores(nr_class, model, {2:1, 5:1}) == \ get_scores(nr_class, new_model, {2:1, 5:1}) assert get_scores(nr_class, model, {2:1, 3:1, 4:1}) == \ get_scores(nr_class, new_model, {2:1, 3:1, 4:1})
class ThincModel(object): def __init__(self, nlp, nr_class): self.nlp = nlp self.nr_class = nr_class self._eg = Example(nr_class=nr_class) self._model = AveragedPerceptron([]) def Eg(self, text, opt=None, label=None): eg = self._eg eg.reset() doc = self.nlp(text) features = [] word_types = set() i = 0 for token in doc[:-1]: next_token = doc[i + 1] strings = (token.lower_, next_token.lower_) key = hash_string('%s_%s' % strings) feat_slot = 0 feat_value = 1 features.append((0, token.lower, 1)) features.append((feat_slot, key, feat_value)) i += 1 eg.features = features if opt is not None: eg.is_valid = [(clas in opt) for clas in range(self.nr_class)] if label is not None: eg.costs = [clas != label for clas in range(self.nr_class)] return eg def predict(self, text, opt): return self._model.predict_example(self.Eg(text, opt)) def train(self, examples, n_iter=5): for i in range(n_iter): loss = 0 random.shuffle(examples) negation_count = 0 for text, opt, label in examples: eg = self.Eg(text, opt, label) self._model.train_example(eg) loss += eg.guess != label print(loss) self._model.end_training() def evaluate(self, examples): total = 0 correct = 0 for i, (text, opt, label) in enumerate(examples): eg = self.predict(text, opt) correct += eg.guess == label total += 1 return correct / total def dump(self, loc): self._model.dump(loc) def load(self, loc): self._model.load(loc)