Example #1
0
 def __test_ner(self, trn, val, preproc):
     self.assertTrue(U.is_iter(trn))
     self.assertTrue(U.is_ner(data=trn))
     self.assertFalse(U.is_multilabel(trn))
     self.assertEqual(U.shape_from_data(trn), (14041, 47))
     self.assertFalse(U.ondisk(trn))
     self.assertEqual(U.nsamples_from_data(trn), 14041)
     self.assertEqual(U.nclasses_from_data(trn), 10)
     self.assertEqual(len(U.y_from_data(trn)), 14041)
     self.assertFalse(U.bert_data_tuple(trn))
     self.assertEqual(
         preproc.get_classes(),
         [
             "<pad>",
             "O",
             "B-LOC",
             "B-PER",
             "B-ORG",
             "I-PER",
             "I-ORG",
             "B-MISC",
             "I-LOC",
             "I-MISC",
         ],
     )
     nerseq = preproc.preprocess(["hello world"])
     self.assertEqual(len(nerseq), 1)
     self.assertEqual(nerseq[0][0][0][0].tolist(), [21010, 100])
 def __test_images(self, trn, val, preproc, nsamples=16):
     self.assertTrue(U.is_iter(trn))
     self.assertEqual(U.shape_from_data(trn), (224, 224, 3))
     self.assertTrue(U.ondisk(trn))
     self.assertEqual(U.nsamples_from_data(trn), nsamples)
     self.assertEqual(U.nclasses_from_data(trn), 2)
     self.assertEqual(U.y_from_data(trn).shape, (nsamples,2))
     self.assertFalse(U.bert_data_tuple(trn))
     self.assertEqual(preproc.get_classes(), ['cat', 'dog'])
     (gen, steps)  = preproc.preprocess('./image_data/image_folder/all')
     self.assertEqual(type(gen).__name__, 'DirectoryIterator')
     self.assertEqual(steps, 1)
 def test_images_from_fname_regression(self):
     (trn, val, preproc)  = images_from_fname_regression()
     nsamples = 18
     self.assertTrue(U.is_iter(trn))
     self.assertEqual(U.shape_from_data(trn), (224, 224, 3))
     self.assertTrue(U.ondisk(trn))
     self.assertEqual(U.nsamples_from_data(trn), nsamples)
     #self.assertRaises(Exception, U.nclasses_from_data(trn))
     self.assertEqual(U.y_from_data(trn).shape, (nsamples,))
     self.assertFalse(U.bert_data_tuple(trn))
     self.assertEqual(preproc.get_classes(), [])
     (gen, steps)  = preproc.preprocess('./image_data/image_folder/all')
     self.assertEqual(type(gen).__name__, 'DirectoryIterator')
     self.assertEqual(steps, 1)
 def __test_texts_bert(self, trn, val, preproc):
     self.assertFalse(U.is_iter(trn))
     self.assertEqual(trn[0][0].shape, (4, 10))
     self.assertEqual(trn[1].shape, (4, 2))
     self.assertEqual(val[0][0].shape, (4, 10))
     self.assertEqual(val[1].shape, (4, 2))
     self.assertFalse(U.is_multilabel(trn))
     self.assertEqual(U.shape_from_data(trn), (4, 10))
     self.assertFalse(U.ondisk(trn))
     self.assertEqual(U.nsamples_from_data(trn), 4)
     self.assertEqual(U.nclasses_from_data(trn), 2)
     self.assertEqual(U.y_from_data(trn).shape, (4,2))
     self.assertTrue(U.bert_data_tuple(trn))
     self.assertEqual(preproc.get_classes(), preproc.get_classes())
     self.assertEqual(preproc.preprocess(['hello book'])[0][0][0], 101)
     self.assertEqual(preproc.preprocess(['hello book'])[0].shape, (1, 10))
     self.assertEqual(preproc.undo(val[0][0][0]), '[CLS] the book is bad . [SEP]')
 def __test_texts_standard(self, trn, val, preproc):
     self.assertFalse(U.is_iter(trn))
     self.assertEqual(trn[0].shape, (4, 10))
     self.assertEqual(trn[1].shape, (4, 2))
     self.assertEqual(val[0].shape, (4, 10))
     self.assertEqual(val[1].shape, (4, 2))
     self.assertFalse(U.is_multilabel(trn))
     self.assertEqual(U.shape_from_data(trn), (4, 10))
     self.assertFalse(U.ondisk(trn))
     self.assertEqual(U.nsamples_from_data(trn), 4)
     self.assertEqual(U.nclasses_from_data(trn), 2)
     self.assertEqual(U.y_from_data(trn).shape, (4,2))
     self.assertFalse(U.bert_data_tuple(trn))
     self.assertEqual(preproc.get_classes(), preproc.get_classes())
     self.assertEqual(preproc.ngram_count(), 3)
     self.assertEqual(preproc.preprocess(['hello book'])[0][-1], 1)
     self.assertEqual(preproc.preprocess(['hello book']).shape, (1, 10))
     self.assertEqual(preproc.undo(val[0][0]), 'the book is bad')
Example #6
0
 def __test_texts_tfidf(self, trn, val, preproc):
     self.assertFalse(U.is_iter(trn))
     self.assertEqual(trn[0].shape, (4, 100))
     self.assertEqual(trn[1].shape, (4, 2))
     self.assertEqual(val[0].shape, (4, 100))
     self.assertEqual(val[1].shape, (4, 2))
     self.assertFalse(U.is_multilabel(trn))
     self.assertEqual(U.shape_from_data(trn), (4, 100))
     self.assertFalse(U.ondisk(trn))
     self.assertEqual(U.nsamples_from_data(trn), 4)
     self.assertEqual(U.nclasses_from_data(trn), 2)
     self.assertEqual(U.y_from_data(trn).shape, (4, 2))
     self.assertFalse(U.bert_data_tuple(trn))
     self.assertEqual(preproc.get_classes(), ['neg', 'pos'])
     self.assertEqual(preproc.ngram_count(), 1)
     self.assertEqual('%.4f' % (preproc.preprocess(['hello book'])[0][1]),
                      '0.5878')
     self.assertEqual(preproc.preprocess(['hello book']).shape, (1, 100))
     self.assertEqual(preproc.undo(val[0][0]), 'book is the bad')