Example #1
0
def test_dump_load(model):
    loc = '/tmp/test_model'
    model.end_training()
    model.dump(loc)
    string = open(loc, 'rb').read()
    assert string
    new_model = AveragedPerceptron([(1,), (2,), (3,), (4,)])
    nr_class = 5
    assert get_scores(nr_class, model, {1: 1, 3: 1, 4: 1}) != \
           get_scores(nr_class, new_model, {1:1, 3:1, 4:1})
    assert get_scores(nr_class, model, {2:1, 5:1}) != \
            get_scores(nr_class, new_model, {2:1, 5:1})
    assert get_scores(nr_class, model, {2:1, 3:1, 4:1}) != \
           get_scores(nr_class, new_model, {2:1, 3:1, 4:1})
    new_model.load(loc)
    assert get_scores(nr_class, model, {1:1, 3:1, 4:1}) == \
           get_scores(nr_class, new_model, {1:1, 3:1, 4:1})
    assert get_scores(nr_class, model, {2:1, 5:1}) == \
           get_scores(nr_class, new_model, {2:1, 5:1})
    assert get_scores(nr_class, model, {2:1, 3:1, 4:1}) == \
           get_scores(nr_class, new_model, {2:1, 3:1, 4:1})
Example #2
0
def test_dump_load(model):
    loc = tempfile.mkstemp()[1]
    model.end_training()
    model.dump(loc)
    string = open(loc, 'rb').read()
    assert string
    new_model = AveragedPerceptron([(1, ), (2, ), (3, ), (4, )])
    nr_class = 5
    assert get_scores(nr_class, model, {1: 1, 3: 1, 4: 1}) != \
           get_scores(nr_class, new_model, {1:1, 3:1, 4:1})
    assert get_scores(nr_class, model, {2:1, 5:1}) != \
            get_scores(nr_class, new_model, {2:1, 5:1})
    assert get_scores(nr_class, model, {2:1, 3:1, 4:1}) != \
           get_scores(nr_class, new_model, {2:1, 3:1, 4:1})
    new_model.load(loc)
    assert get_scores(nr_class, model, {1:1, 3:1, 4:1}) == \
           get_scores(nr_class, new_model, {1:1, 3:1, 4:1})
    assert get_scores(nr_class, model, {2:1, 5:1}) == \
           get_scores(nr_class, new_model, {2:1, 5:1})
    assert get_scores(nr_class, model, {2:1, 3:1, 4:1}) == \
           get_scores(nr_class, new_model, {2:1, 3:1, 4:1})
Example #3
0
class ThincModel(object):
    def __init__(self, nlp, nr_class):
        self.nlp = nlp
        self.nr_class = nr_class
        self._eg = Example(nr_class=nr_class)
        self._model = AveragedPerceptron([])

    def Eg(self, text, opt=None, label=None):
        eg = self._eg
        eg.reset()

        doc = self.nlp(text)

        features = []
        word_types = set()
        i = 0
        for token in doc[:-1]:
            next_token = doc[i + 1]

            strings = (token.lower_, next_token.lower_)
            key = hash_string('%s_%s' % strings)
            feat_slot = 0
            feat_value = 1
            features.append((0, token.lower, 1))
            features.append((feat_slot, key, feat_value))
            i += 1

        eg.features = features
        if opt is not None:
            eg.is_valid = [(clas in opt) for clas in range(self.nr_class)]
        if label is not None:
            eg.costs = [clas != label for clas in range(self.nr_class)]
        return eg

    def predict(self, text, opt):
        return self._model.predict_example(self.Eg(text, opt))

    def train(self, examples, n_iter=5):
        for i in range(n_iter):
            loss = 0
            random.shuffle(examples)
            negation_count = 0
            for text, opt, label in examples:
                eg = self.Eg(text, opt, label)
                self._model.train_example(eg)
                loss += eg.guess != label
            print(loss)
        self._model.end_training()

    def evaluate(self, examples):
        total = 0
        correct = 0
        for i, (text, opt, label) in enumerate(examples):
            eg = self.predict(text, opt)
            correct += eg.guess == label
            total += 1
        return correct / total

    def dump(self, loc):
        self._model.dump(loc)

    def load(self, loc):
        self._model.load(loc)