Esempio n. 1
0
    def test_multilabel(self):
        X, Y = synthetic_multilabel()
        self.assertTrue(U.is_multilabel((X, Y)))
        MAXLEN = 7
        MAXFEATURES = 4
        NUM_CLASSES = 4
        model = Sequential()
        model.add(Embedding(MAXFEATURES + 1, 50, input_length=MAXLEN))
        model.add(GlobalAveragePooling1D())
        model.add(Dense(NUM_CLASSES, activation='sigmoid'))
        model.compile(loss='binary_crossentropy',
                      optimizer='adam',
                      metrics=['accuracy'])
        learner = ktrain.get_learner(model,
                                     train_data=(X, Y),
                                     val_data=(X, Y),
                                     batch_size=1)
        learner.lr_find()

        # use loss instead of accuracy due to: https://github.com/tensorflow/tensorflow/issues/41114
        hist = learner.fit(0.001, 200)
        learner.view_top_losses(n=5)
        learner.validate()
        #final_acc = hist.history[VAL_ACC_NAME][-1]
        #print('final_accuracy:%s' % (final_acc))
        #self.assertGreater(final_acc, 0.97)

        final_loss = hist.history['val_loss'][-1]
        print('final_loss:%s' % (final_loss))
        self.assertLess(final_loss, 0.05)
Esempio n. 2
0
 def __test_ner(self, trn, val, preproc):
     self.assertTrue(U.is_iter(trn))
     self.assertTrue(U.is_ner(data=trn))
     self.assertFalse(U.is_multilabel(trn))
     self.assertEqual(U.shape_from_data(trn), (14041, 47))
     self.assertFalse(U.ondisk(trn))
     self.assertEqual(U.nsamples_from_data(trn), 14041)
     self.assertEqual(U.nclasses_from_data(trn), 10)
     self.assertEqual(len(U.y_from_data(trn)), 14041)
     self.assertFalse(U.bert_data_tuple(trn))
     self.assertEqual(
         preproc.get_classes(),
         [
             "<pad>",
             "O",
             "B-LOC",
             "B-PER",
             "B-ORG",
             "I-PER",
             "I-ORG",
             "B-MISC",
             "I-LOC",
             "I-MISC",
         ],
     )
     nerseq = preproc.preprocess(["hello world"])
     self.assertEqual(len(nerseq), 1)
     self.assertEqual(nerseq[0][0][0][0].tolist(), [21010, 100])
 def __test_texts_bert(self, trn, val, preproc):
     self.assertFalse(U.is_iter(trn))
     self.assertEqual(trn[0][0].shape, (4, 10))
     self.assertEqual(trn[1].shape, (4, 2))
     self.assertEqual(val[0][0].shape, (4, 10))
     self.assertEqual(val[1].shape, (4, 2))
     self.assertFalse(U.is_multilabel(trn))
     self.assertEqual(U.shape_from_data(trn), (4, 10))
     self.assertFalse(U.ondisk(trn))
     self.assertEqual(U.nsamples_from_data(trn), 4)
     self.assertEqual(U.nclasses_from_data(trn), 2)
     self.assertEqual(U.y_from_data(trn).shape, (4,2))
     self.assertTrue(U.bert_data_tuple(trn))
     self.assertEqual(preproc.get_classes(), preproc.get_classes())
     self.assertEqual(preproc.preprocess(['hello book'])[0][0][0], 101)
     self.assertEqual(preproc.preprocess(['hello book'])[0].shape, (1, 10))
     self.assertEqual(preproc.undo(val[0][0][0]), '[CLS] the book is bad . [SEP]')
 def __test_texts_standard(self, trn, val, preproc):
     self.assertFalse(U.is_iter(trn))
     self.assertEqual(trn[0].shape, (4, 10))
     self.assertEqual(trn[1].shape, (4, 2))
     self.assertEqual(val[0].shape, (4, 10))
     self.assertEqual(val[1].shape, (4, 2))
     self.assertFalse(U.is_multilabel(trn))
     self.assertEqual(U.shape_from_data(trn), (4, 10))
     self.assertFalse(U.ondisk(trn))
     self.assertEqual(U.nsamples_from_data(trn), 4)
     self.assertEqual(U.nclasses_from_data(trn), 2)
     self.assertEqual(U.y_from_data(trn).shape, (4,2))
     self.assertFalse(U.bert_data_tuple(trn))
     self.assertEqual(preproc.get_classes(), preproc.get_classes())
     self.assertEqual(preproc.ngram_count(), 3)
     self.assertEqual(preproc.preprocess(['hello book'])[0][-1], 1)
     self.assertEqual(preproc.preprocess(['hello book']).shape, (1, 10))
     self.assertEqual(preproc.undo(val[0][0]), 'the book is bad')
Esempio n. 5
0
 def __test_texts_tfidf(self, trn, val, preproc):
     self.assertFalse(U.is_iter(trn))
     self.assertEqual(trn[0].shape, (4, 100))
     self.assertEqual(trn[1].shape, (4, 2))
     self.assertEqual(val[0].shape, (4, 100))
     self.assertEqual(val[1].shape, (4, 2))
     self.assertFalse(U.is_multilabel(trn))
     self.assertEqual(U.shape_from_data(trn), (4, 100))
     self.assertFalse(U.ondisk(trn))
     self.assertEqual(U.nsamples_from_data(trn), 4)
     self.assertEqual(U.nclasses_from_data(trn), 2)
     self.assertEqual(U.y_from_data(trn).shape, (4, 2))
     self.assertFalse(U.bert_data_tuple(trn))
     self.assertEqual(preproc.get_classes(), ['neg', 'pos'])
     self.assertEqual(preproc.ngram_count(), 1)
     self.assertEqual('%.4f' % (preproc.preprocess(['hello book'])[0][1]),
                      '0.5878')
     self.assertEqual(preproc.preprocess(['hello book']).shape, (1, 100))
     self.assertEqual(preproc.undo(val[0][0]), 'book is the bad')