def test_multilabel(self): X, Y = synthetic_multilabel() self.assertTrue(U.is_multilabel((X, Y))) MAXLEN = 7 MAXFEATURES = 4 NUM_CLASSES = 4 model = Sequential() model.add(Embedding(MAXFEATURES + 1, 50, input_length=MAXLEN)) model.add(GlobalAveragePooling1D()) model.add(Dense(NUM_CLASSES, activation='sigmoid')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) learner = ktrain.get_learner(model, train_data=(X, Y), val_data=(X, Y), batch_size=1) learner.lr_find() # use loss instead of accuracy due to: https://github.com/tensorflow/tensorflow/issues/41114 hist = learner.fit(0.001, 200) learner.view_top_losses(n=5) learner.validate() #final_acc = hist.history[VAL_ACC_NAME][-1] #print('final_accuracy:%s' % (final_acc)) #self.assertGreater(final_acc, 0.97) final_loss = hist.history['val_loss'][-1] print('final_loss:%s' % (final_loss)) self.assertLess(final_loss, 0.05)
def __test_ner(self, trn, val, preproc): self.assertTrue(U.is_iter(trn)) self.assertTrue(U.is_ner(data=trn)) self.assertFalse(U.is_multilabel(trn)) self.assertEqual(U.shape_from_data(trn), (14041, 47)) self.assertFalse(U.ondisk(trn)) self.assertEqual(U.nsamples_from_data(trn), 14041) self.assertEqual(U.nclasses_from_data(trn), 10) self.assertEqual(len(U.y_from_data(trn)), 14041) self.assertFalse(U.bert_data_tuple(trn)) self.assertEqual( preproc.get_classes(), [ "<pad>", "O", "B-LOC", "B-PER", "B-ORG", "I-PER", "I-ORG", "B-MISC", "I-LOC", "I-MISC", ], ) nerseq = preproc.preprocess(["hello world"]) self.assertEqual(len(nerseq), 1) self.assertEqual(nerseq[0][0][0][0].tolist(), [21010, 100])
def __test_texts_bert(self, trn, val, preproc): self.assertFalse(U.is_iter(trn)) self.assertEqual(trn[0][0].shape, (4, 10)) self.assertEqual(trn[1].shape, (4, 2)) self.assertEqual(val[0][0].shape, (4, 10)) self.assertEqual(val[1].shape, (4, 2)) self.assertFalse(U.is_multilabel(trn)) self.assertEqual(U.shape_from_data(trn), (4, 10)) self.assertFalse(U.ondisk(trn)) self.assertEqual(U.nsamples_from_data(trn), 4) self.assertEqual(U.nclasses_from_data(trn), 2) self.assertEqual(U.y_from_data(trn).shape, (4,2)) self.assertTrue(U.bert_data_tuple(trn)) self.assertEqual(preproc.get_classes(), preproc.get_classes()) self.assertEqual(preproc.preprocess(['hello book'])[0][0][0], 101) self.assertEqual(preproc.preprocess(['hello book'])[0].shape, (1, 10)) self.assertEqual(preproc.undo(val[0][0][0]), '[CLS] the book is bad . [SEP]')
def __test_texts_standard(self, trn, val, preproc): self.assertFalse(U.is_iter(trn)) self.assertEqual(trn[0].shape, (4, 10)) self.assertEqual(trn[1].shape, (4, 2)) self.assertEqual(val[0].shape, (4, 10)) self.assertEqual(val[1].shape, (4, 2)) self.assertFalse(U.is_multilabel(trn)) self.assertEqual(U.shape_from_data(trn), (4, 10)) self.assertFalse(U.ondisk(trn)) self.assertEqual(U.nsamples_from_data(trn), 4) self.assertEqual(U.nclasses_from_data(trn), 2) self.assertEqual(U.y_from_data(trn).shape, (4,2)) self.assertFalse(U.bert_data_tuple(trn)) self.assertEqual(preproc.get_classes(), preproc.get_classes()) self.assertEqual(preproc.ngram_count(), 3) self.assertEqual(preproc.preprocess(['hello book'])[0][-1], 1) self.assertEqual(preproc.preprocess(['hello book']).shape, (1, 10)) self.assertEqual(preproc.undo(val[0][0]), 'the book is bad')
def __test_texts_tfidf(self, trn, val, preproc): self.assertFalse(U.is_iter(trn)) self.assertEqual(trn[0].shape, (4, 100)) self.assertEqual(trn[1].shape, (4, 2)) self.assertEqual(val[0].shape, (4, 100)) self.assertEqual(val[1].shape, (4, 2)) self.assertFalse(U.is_multilabel(trn)) self.assertEqual(U.shape_from_data(trn), (4, 100)) self.assertFalse(U.ondisk(trn)) self.assertEqual(U.nsamples_from_data(trn), 4) self.assertEqual(U.nclasses_from_data(trn), 2) self.assertEqual(U.y_from_data(trn).shape, (4, 2)) self.assertFalse(U.bert_data_tuple(trn)) self.assertEqual(preproc.get_classes(), ['neg', 'pos']) self.assertEqual(preproc.ngram_count(), 1) self.assertEqual('%.4f' % (preproc.preprocess(['hello book'])[0][1]), '0.5878') self.assertEqual(preproc.preprocess(['hello book']).shape, (1, 100)) self.assertEqual(preproc.undo(val[0][0]), 'book is the bad')