Ejemplo n.º 1
0
Archivo: snli.py Proyecto: ikajic/pysem
    def rnn_accuracy(self, data):
        data = (x for x in data)
        n_correct = 0
        n_total = 0

        while True:
            batch = list(islice(data, 500))
            n_total += len(batch)
            if len(batch) == 0:
                break

            s1s = [sample.sentence1 for sample in batch]
            s2s = [sample.sentence2 for sample in batch]

            s1_encoder = self.encoder
            s2_encoder = deepcopy(self.encoder)

            s1_encoder.forward_pass(s1s)
            s2_encoder.forward_pass(s2s)

            s1_embs = s1_encoder.get_root_embedding()
            s2_embs = s2_encoder.get_root_embedding()

            xs = np.concatenate((s1_embs, s2_embs))
            ys = SNLI.binarize([sample.label for sample in batch])

            predictions = self.classifier.predict(xs)
            n_correct += sum(np.equal(predictions, np.argmax(ys, axis=0)))

        return n_correct / n_total
Ejemplo n.º 2
0
def test_snli_binarizer():
    labels = ['entailment', 'neutral', 'contradiction']
    array = SNLI.binarize(labels)

    assert np.sum(array) == len(labels)
    assert array[0, 0] == 1
    assert array[0, 1] == 0
    assert array[1, 1] == 1
    assert array[1, 0] == 0
    assert array[2, 2] == 1
Ejemplo n.º 3
0
Archivo: snli.py Proyecto: ikajic/pysem
    def _train_bow_model(self):
        for n in range(self.iters):
            self.encoder_copy = deepcopy(self.encoder)
            batch = random.sample(self.train_data, self.bsize)
            s1s = [sample.sentence1 for sample in batch]
            s2s = [sample.sentence2 for sample in batch]
            ys = SNLI.binarize([sample.label for sample in batch])

            self.training_iteration(s1s, s2s, ys)
            self.bow_encoder_update()
            self._log_status(n)

        self.acc.append(self.rnn_accuracy(self.dev_data))
Ejemplo n.º 4
0
Archivo: snli.py Proyecto: ikajic/pysem
    def _train_recursive_model(self):
        '''Adapt training regime to accomodate recursive encoder structure.'''
        for n in range(self.iters):
            self.encoder.tree = None
            self.encoder_copy = deepcopy(self.encoder)
            batch = random.sample(self.train_data, self.bsize)
            w1 = []
            w2 = []
            for sample in batch:
                s1 = sample.sentence1
                s2 = sample.sentence2
                ys = SNLI.binarize([sample.label])
                self.training_iteration(s1, s2, ys)

                w1 += [n.lower_ for n in self.encoder.tree]
                w2 += [n.lower_ for n in self.encoder_copy.tree]

            self.word_set = set(w1 + w2)
            self.dnn_encoder_update()
            self._log_status(n)

        self.acc.append(self.dnn_accuracy(self.dev_data))
Ejemplo n.º 5
0
import pickle
import time

from pysem.corpora import SNLI
from pysem.utils.ml import MultiLayerPerceptron
from pysem.utils.snli import CompositeModel, BagOfWords

snli = SNLI('/Users/peterblouw/corpora/snli_1.0/')
snli.load_vocab('snli_words.pickle')

dim = 300
pretrained = 'pretrained_snli_embeddings.pickle'

encoder = BagOfWords(dim=dim, vocab=snli.vocab, pretrained=pretrained)
classifier = MultiLayerPerceptron(di=2 * dim, dh=dim, do=3)

start_time = time.time()
model = CompositeModel(snli, encoder, classifier)
model.train(iters=50, bsize=100, rate=0.1, log_interval=10, schedule=12000)

print('Test: ', model.rnn_accuracy(model.test_data))
print('Train: ', model.rnn_accuracy(model.train_data))
print('Dev: ', model.rnn_accuracy(model.dev_data))

print('Total runtime: ', ((time.time() - start_time) / 3600.0))

with open('bow_model', 'wb') as pfile:
    pickle.dump(model, pfile)
Ejemplo n.º 6
0
def snli():
    snli = SNLI(snli_path)
    snli.build_vocab()
    snli.load_sentences()
    return snli