def __test_ner(self, trn, val, preproc): self.assertTrue(U.is_iter(trn)) self.assertTrue(U.is_ner(data=trn)) self.assertFalse(U.is_multilabel(trn)) self.assertEqual(U.shape_from_data(trn), (14041, 47)) self.assertFalse(U.ondisk(trn)) self.assertEqual(U.nsamples_from_data(trn), 14041) self.assertEqual(U.nclasses_from_data(trn), 10) self.assertEqual(len(U.y_from_data(trn)), 14041) self.assertFalse(U.bert_data_tuple(trn)) self.assertEqual( preproc.get_classes(), [ "<pad>", "O", "B-LOC", "B-PER", "B-ORG", "I-PER", "I-ORG", "B-MISC", "I-LOC", "I-MISC", ], ) nerseq = preproc.preprocess(["hello world"]) self.assertEqual(len(nerseq), 1) self.assertEqual(nerseq[0][0][0][0].tolist(), [21010, 100])
def __test_images(self, trn, val, preproc, nsamples=16): self.assertTrue(U.is_iter(trn)) self.assertEqual(U.shape_from_data(trn), (224, 224, 3)) self.assertTrue(U.ondisk(trn)) self.assertEqual(U.nsamples_from_data(trn), nsamples) self.assertEqual(U.nclasses_from_data(trn), 2) self.assertEqual(U.y_from_data(trn).shape, (nsamples,2)) self.assertFalse(U.bert_data_tuple(trn)) self.assertEqual(preproc.get_classes(), ['cat', 'dog']) (gen, steps) = preproc.preprocess('./image_data/image_folder/all') self.assertEqual(type(gen).__name__, 'DirectoryIterator') self.assertEqual(steps, 1)
def test_images_from_fname_regression(self): (trn, val, preproc) = images_from_fname_regression() nsamples = 18 self.assertTrue(U.is_iter(trn)) self.assertEqual(U.shape_from_data(trn), (224, 224, 3)) self.assertTrue(U.ondisk(trn)) self.assertEqual(U.nsamples_from_data(trn), nsamples) #self.assertRaises(Exception, U.nclasses_from_data(trn)) self.assertEqual(U.y_from_data(trn).shape, (nsamples,)) self.assertFalse(U.bert_data_tuple(trn)) self.assertEqual(preproc.get_classes(), []) (gen, steps) = preproc.preprocess('./image_data/image_folder/all') self.assertEqual(type(gen).__name__, 'DirectoryIterator') self.assertEqual(steps, 1)
def __test_texts_bert(self, trn, val, preproc): self.assertFalse(U.is_iter(trn)) self.assertEqual(trn[0][0].shape, (4, 10)) self.assertEqual(trn[1].shape, (4, 2)) self.assertEqual(val[0][0].shape, (4, 10)) self.assertEqual(val[1].shape, (4, 2)) self.assertFalse(U.is_multilabel(trn)) self.assertEqual(U.shape_from_data(trn), (4, 10)) self.assertFalse(U.ondisk(trn)) self.assertEqual(U.nsamples_from_data(trn), 4) self.assertEqual(U.nclasses_from_data(trn), 2) self.assertEqual(U.y_from_data(trn).shape, (4,2)) self.assertTrue(U.bert_data_tuple(trn)) self.assertEqual(preproc.get_classes(), preproc.get_classes()) self.assertEqual(preproc.preprocess(['hello book'])[0][0][0], 101) self.assertEqual(preproc.preprocess(['hello book'])[0].shape, (1, 10)) self.assertEqual(preproc.undo(val[0][0][0]), '[CLS] the book is bad . [SEP]')
def __test_texts_standard(self, trn, val, preproc): self.assertFalse(U.is_iter(trn)) self.assertEqual(trn[0].shape, (4, 10)) self.assertEqual(trn[1].shape, (4, 2)) self.assertEqual(val[0].shape, (4, 10)) self.assertEqual(val[1].shape, (4, 2)) self.assertFalse(U.is_multilabel(trn)) self.assertEqual(U.shape_from_data(trn), (4, 10)) self.assertFalse(U.ondisk(trn)) self.assertEqual(U.nsamples_from_data(trn), 4) self.assertEqual(U.nclasses_from_data(trn), 2) self.assertEqual(U.y_from_data(trn).shape, (4,2)) self.assertFalse(U.bert_data_tuple(trn)) self.assertEqual(preproc.get_classes(), preproc.get_classes()) self.assertEqual(preproc.ngram_count(), 3) self.assertEqual(preproc.preprocess(['hello book'])[0][-1], 1) self.assertEqual(preproc.preprocess(['hello book']).shape, (1, 10)) self.assertEqual(preproc.undo(val[0][0]), 'the book is bad')
def __test_texts_tfidf(self, trn, val, preproc): self.assertFalse(U.is_iter(trn)) self.assertEqual(trn[0].shape, (4, 100)) self.assertEqual(trn[1].shape, (4, 2)) self.assertEqual(val[0].shape, (4, 100)) self.assertEqual(val[1].shape, (4, 2)) self.assertFalse(U.is_multilabel(trn)) self.assertEqual(U.shape_from_data(trn), (4, 100)) self.assertFalse(U.ondisk(trn)) self.assertEqual(U.nsamples_from_data(trn), 4) self.assertEqual(U.nclasses_from_data(trn), 2) self.assertEqual(U.y_from_data(trn).shape, (4, 2)) self.assertFalse(U.bert_data_tuple(trn)) self.assertEqual(preproc.get_classes(), ['neg', 'pos']) self.assertEqual(preproc.ngram_count(), 1) self.assertEqual('%.4f' % (preproc.preprocess(['hello book'])[0][1]), '0.5878') self.assertEqual(preproc.preprocess(['hello book']).shape, (1, 100)) self.assertEqual(preproc.undo(val[0][0]), 'book is the bad')