예제 #1
0
    def test_folder(self):
        (trn, val, preproc) = vis.images_from_folder(
                                                      datadir='image_data/image_folder',
                                                      data_aug=vis.get_data_aug(horizontal_flip=True), 
                                                      classes=['cat', 'dog'],
                                                      train_test_names=['train', 'valid'])
        model = vis.image_classifier('pretrained_resnet50', trn, val)
        learner = ktrain.get_learner(model=model, train_data=trn, val_data=val, batch_size=1)
        learner.freeze()


        # test weight decay
        self.assertEqual(learner.get_weight_decay(), None)
        learner.set_weight_decay(1e-2)
        self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)

        # train
        hist = learner.autofit(1e-3, monitor=VAL_ACC_NAME)

        # test train
        self.assertAlmostEqual(max(hist.history['lr']), 1e-3)
        if max(hist.history[ACC_NAME]) == 0.5:
            raise Exception('unlucky initialization: please run test again')
        self.assertGreater(max(hist.history[ACC_NAME]), 0.8)

        # test top_losses
        obs = learner.top_losses(n=1, val_data=val)
        print(obs)
        if obs:
            self.assertIn(obs[0][0], list(range(U.nsamples_from_data(val))))
        else:
            self.assertEqual(max(hist.history[VAL_ACC_NAME]), 1)


        # test load and save model
        learner.save_model('/tmp/test_model')
        learner.load_model('/tmp/test_model')

        # test validate
        cm = learner.validate(val_data=val)
        print(cm)
        for i, row in enumerate(cm):
            self.assertEqual(np.argmax(row), i)

        # test predictor
        p = ktrain.get_predictor(learner.model, preproc)
        r = p.predict_folder('image_data/image_folder/train/')
        print(r)
        self.assertEqual(r[0][1], 'cat')
        r = p.predict_proba_folder('image_data/image_folder/train/')
        self.assertEqual(np.argmax(r[0][1]), 0)
        r = p.predict_filename('image_data/image_folder/train/cat/cat.11737.jpg')
        self.assertEqual(r, ['cat'])
        r = p.predict_proba_filename('image_data/image_folder/train/cat/cat.11737.jpg')
        self.assertEqual(np.argmax(r), 0)

        p.save('/tmp/test_predictor')
        p = ktrain.load_predictor('/tmp/test_predictor')
        r = p.predict_filename('image_data/image_folder/train/cat/cat.11737.jpg')
        self.assertEqual(r, ['cat'])
예제 #2
0
 def __test_ner(self, trn, val, preproc):
     self.assertTrue(U.is_iter(trn))
     self.assertTrue(U.is_ner(data=trn))
     self.assertFalse(U.is_multilabel(trn))
     self.assertEqual(U.shape_from_data(trn), (14041, 47))
     self.assertFalse(U.ondisk(trn))
     self.assertEqual(U.nsamples_from_data(trn), 14041)
     self.assertEqual(U.nclasses_from_data(trn), 10)
     self.assertEqual(len(U.y_from_data(trn)), 14041)
     self.assertFalse(U.bert_data_tuple(trn))
     self.assertEqual(
         preproc.get_classes(),
         [
             "<pad>",
             "O",
             "B-LOC",
             "B-PER",
             "B-ORG",
             "I-PER",
             "I-ORG",
             "B-MISC",
             "I-LOC",
             "I-MISC",
         ],
     )
     nerseq = preproc.preprocess(["hello world"])
     self.assertEqual(len(nerseq), 1)
     self.assertEqual(nerseq[0][0][0][0].tolist(), [21010, 100])
 def __test_images(self, trn, val, preproc, nsamples=16):
     self.assertTrue(U.is_iter(trn))
     self.assertEqual(U.shape_from_data(trn), (224, 224, 3))
     self.assertTrue(U.ondisk(trn))
     self.assertEqual(U.nsamples_from_data(trn), nsamples)
     self.assertEqual(U.nclasses_from_data(trn), 2)
     self.assertEqual(U.y_from_data(trn).shape, (nsamples,2))
     self.assertFalse(U.bert_data_tuple(trn))
     self.assertEqual(preproc.get_classes(), ['cat', 'dog'])
     (gen, steps)  = preproc.preprocess('./image_data/image_folder/all')
     self.assertEqual(type(gen).__name__, 'DirectoryIterator')
     self.assertEqual(steps, 1)
 def test_images_from_fname_regression(self):
     (trn, val, preproc)  = images_from_fname_regression()
     nsamples = 18
     self.assertTrue(U.is_iter(trn))
     self.assertEqual(U.shape_from_data(trn), (224, 224, 3))
     self.assertTrue(U.ondisk(trn))
     self.assertEqual(U.nsamples_from_data(trn), nsamples)
     #self.assertRaises(Exception, U.nclasses_from_data(trn))
     self.assertEqual(U.y_from_data(trn).shape, (nsamples,))
     self.assertFalse(U.bert_data_tuple(trn))
     self.assertEqual(preproc.get_classes(), [])
     (gen, steps)  = preproc.preprocess('./image_data/image_folder/all')
     self.assertEqual(type(gen).__name__, 'DirectoryIterator')
     self.assertEqual(steps, 1)
 def __test_texts_bert(self, trn, val, preproc):
     self.assertFalse(U.is_iter(trn))
     self.assertEqual(trn[0][0].shape, (4, 10))
     self.assertEqual(trn[1].shape, (4, 2))
     self.assertEqual(val[0][0].shape, (4, 10))
     self.assertEqual(val[1].shape, (4, 2))
     self.assertFalse(U.is_multilabel(trn))
     self.assertEqual(U.shape_from_data(trn), (4, 10))
     self.assertFalse(U.ondisk(trn))
     self.assertEqual(U.nsamples_from_data(trn), 4)
     self.assertEqual(U.nclasses_from_data(trn), 2)
     self.assertEqual(U.y_from_data(trn).shape, (4,2))
     self.assertTrue(U.bert_data_tuple(trn))
     self.assertEqual(preproc.get_classes(), preproc.get_classes())
     self.assertEqual(preproc.preprocess(['hello book'])[0][0][0], 101)
     self.assertEqual(preproc.preprocess(['hello book'])[0].shape, (1, 10))
     self.assertEqual(preproc.undo(val[0][0][0]), '[CLS] the book is bad . [SEP]')
 def __test_texts_standard(self, trn, val, preproc):
     self.assertFalse(U.is_iter(trn))
     self.assertEqual(trn[0].shape, (4, 10))
     self.assertEqual(trn[1].shape, (4, 2))
     self.assertEqual(val[0].shape, (4, 10))
     self.assertEqual(val[1].shape, (4, 2))
     self.assertFalse(U.is_multilabel(trn))
     self.assertEqual(U.shape_from_data(trn), (4, 10))
     self.assertFalse(U.ondisk(trn))
     self.assertEqual(U.nsamples_from_data(trn), 4)
     self.assertEqual(U.nclasses_from_data(trn), 2)
     self.assertEqual(U.y_from_data(trn).shape, (4,2))
     self.assertFalse(U.bert_data_tuple(trn))
     self.assertEqual(preproc.get_classes(), preproc.get_classes())
     self.assertEqual(preproc.ngram_count(), 3)
     self.assertEqual(preproc.preprocess(['hello book'])[0][-1], 1)
     self.assertEqual(preproc.preprocess(['hello book']).shape, (1, 10))
     self.assertEqual(preproc.undo(val[0][0]), 'the book is bad')
예제 #7
0
 def __test_texts_tfidf(self, trn, val, preproc):
     self.assertFalse(U.is_iter(trn))
     self.assertEqual(trn[0].shape, (4, 100))
     self.assertEqual(trn[1].shape, (4, 2))
     self.assertEqual(val[0].shape, (4, 100))
     self.assertEqual(val[1].shape, (4, 2))
     self.assertFalse(U.is_multilabel(trn))
     self.assertEqual(U.shape_from_data(trn), (4, 100))
     self.assertFalse(U.ondisk(trn))
     self.assertEqual(U.nsamples_from_data(trn), 4)
     self.assertEqual(U.nclasses_from_data(trn), 2)
     self.assertEqual(U.y_from_data(trn).shape, (4, 2))
     self.assertFalse(U.bert_data_tuple(trn))
     self.assertEqual(preproc.get_classes(), ['neg', 'pos'])
     self.assertEqual(preproc.ngram_count(), 1)
     self.assertEqual('%.4f' % (preproc.preprocess(['hello book'])[0][1]),
                      '0.5878')
     self.assertEqual(preproc.preprocess(['hello book']).shape, (1, 100))
     self.assertEqual(preproc.undo(val[0][0]), 'book is the bad')
예제 #8
0
    def test_csv(self):
        train_fpath = "./image_data/train-vision.csv"
        val_fpath = "./image_data/valid-vision.csv"
        trn, val, preproc = vis.images_from_csv(
            train_fpath,
            "filename",
            directory="./image_data/image_folder/all",
            val_filepath=val_fpath,
            label_columns=["cat", "dog"],
            data_aug=vis.get_data_aug(horizontal_flip=True),
        )

        lr = 1e-4
        model = vis.image_classifier("pretrained_resnet50", trn, val)
        learner = ktrain.get_learner(
            model=model, train_data=trn, val_data=val, batch_size=4
        )
        learner.freeze()

        # test weight decay
        self.assertEqual(learner.get_weight_decay(), None)
        learner.set_weight_decay(1e-2)
        self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)

        # train
        hist = learner.fit_onecycle(lr, 3)

        # test train
        self.assertAlmostEqual(max(hist.history["lr"]), lr)
        if max(hist.history[ACC_NAME]) == 0.5:
            raise Exception("unlucky initialization: please run test again")
        self.assertGreater(max(hist.history[ACC_NAME]), 0.8)

        # test top_losses
        obs = learner.top_losses(n=1, val_data=val)
        print(obs)
        if obs:
            self.assertIn(obs[0][0], list(range(U.nsamples_from_data(val))))
        else:
            self.assertEqual(max(hist.history[VAL_ACC_NAME]), 1)

        # test load and save model
        learner.save_model("/tmp/test_model")
        learner.load_model("/tmp/test_model")

        # test validate
        cm = learner.validate(val_data=val)
        print(cm)
        for i, row in enumerate(cm):
            self.assertEqual(np.argmax(row), i)

        # test predictor
        p = ktrain.get_predictor(learner.model, preproc)
        r = p.predict_folder("image_data/image_folder/train/")
        print(r)
        self.assertEqual(r[0][1], "cat")
        r = p.predict_proba_folder("image_data/image_folder/train/")
        self.assertEqual(np.argmax(r[0][1]), 0)
        r = p.predict_filename("image_data/image_folder/train/cat/cat.11737.jpg")
        self.assertEqual(r, ["cat"])
        r = p.predict_proba_filename("image_data/image_folder/train/cat/cat.11737.jpg")
        self.assertEqual(np.argmax(r), 0)

        p.save("/tmp/test_predictor")
        p = ktrain.load_predictor("/tmp/test_predictor")
        r = p.predict_filename("image_data/image_folder/train/cat/cat.11737.jpg")
        self.assertEqual(r, ["cat"])
예제 #9
0
    def test_array_regression(self):

        import numpy as np
        from tensorflow.keras.datasets import mnist
        from tensorflow.keras.utils import to_categorical

        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        x_train = x_train.astype("float32")
        x_test = x_test.astype("float32")
        x_train /= 255
        x_test /= 255
        x_train = np.expand_dims(x_train, axis=3)
        x_test = np.expand_dims(x_test, axis=3)

        classes = None
        data_aug = vis.get_data_aug(
            rotation_range=15,
            zoom_range=0.1,
            width_shift_range=0.1,
            height_shift_range=0.1,
            featurewise_center=False,
            featurewise_std_normalization=False,
        )

        (trn, val, preproc) = vis.images_from_array(
            x_train,
            y_train,
            validation_data=(x_test, y_test),
            data_aug=data_aug,
            is_regression=True,
            class_names=classes,
        )

        model = vis.image_regression_model("default_cnn", trn, val)
        learner = ktrain.get_learner(
            model, train_data=trn, val_data=val, batch_size=128
        )
        hist = learner.fit_onecycle(1e-3, 1)

        # test train
        self.assertAlmostEqual(max(hist.history["lr"]), 1e-3)
        self.assertLess(max(hist.history["val_mae"]), 1)

        # test top_losses
        obs = learner.top_losses(n=1, val_data=val)
        print(obs)
        if obs:
            self.assertIn(obs[0][0], list(range(U.nsamples_from_data(val))))
        else:
            self.assertEqual(max(hist.history[VAL_ACC_NAME]), 1)

        # test weight decay
        self.assertEqual(learner.get_weight_decay(), None)
        learner.set_weight_decay(1e-2)
        self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)

        # test load and save model
        learner.save_model("/tmp/test_model")
        learner.load_model("/tmp/test_model")

        # test validate
        cm = learner.validate(val_data=val)
        print(cm)

        p = ktrain.get_predictor(learner.model, preproc)
        r = p.predict(x_test[0:1])
        print(r)
        self.assertIn(round(r[0]), [6, 7, 8])
        r = p.predict(x_test[0:1], return_proba=True)
        self.assertIn(round(r[0]), [6, 7, 8])

        p.save("/tmp/test_predictor")
        p = ktrain.load_predictor("/tmp/test_predictor")
        r = p.predict(x_test[0:1])
        self.assertIn(round(r[0]), [6, 7, 8])
예제 #10
0
    def test_array(self):

        from tensorflow.keras.datasets import mnist
        from tensorflow.keras.utils import to_categorical
        import numpy as np
        (x_train, y_train), (x_test, y_test) = mnist.load_data()
        x_train = x_train.astype('float32')
        x_test = x_test.astype('float32')
        x_train /= 255
        x_test /= 255
        x_train = np.expand_dims(x_train, axis=3)
        x_test = np.expand_dims(x_test, axis=3)
        y_train = to_categorical(y_train)
        y_test = to_categorical(y_test)

        classes = [
            'zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven',
            'eight', 'nine'
        ]
        data_aug = vis.get_data_aug(
            rotation_range=15,
            zoom_range=0.1,
            width_shift_range=0.1,
            height_shift_range=0.1,
            featurewise_center=False,
            featurewise_std_normalization=False,
        )

        (trn, val, preproc) = vis.images_from_array(x_train,
                                                    y_train,
                                                    validation_data=(x_test,
                                                                     y_test),
                                                    data_aug=data_aug,
                                                    class_names=classes)

        model = vis.image_classifier('default_cnn', trn, val)
        learner = ktrain.get_learner(model,
                                     train_data=trn,
                                     val_data=val,
                                     batch_size=128)
        hist = learner.fit_onecycle(1e-3, 1)

        # test train
        self.assertAlmostEqual(max(hist.history['lr']), 1e-3)
        self.assertGreater(max(hist.history[VAL_ACC_NAME]), 0.97)

        # test top_losses
        obs = learner.top_losses(n=1, val_data=val)
        print(obs)
        if obs:
            self.assertIn(obs[0][0], list(range(U.nsamples_from_data(val))))
        else:
            self.assertEqual(max(hist.history[VAL_ACC_NAME]), 1)

        # test weight decay
        self.assertEqual(learner.get_weight_decay(), None)
        learner.set_weight_decay(1e-2)
        self.assertAlmostEqual(learner.get_weight_decay(), 1e-2)

        # test load and save model
        learner.save_model('/tmp/test_model')
        learner.load_model('/tmp/test_model')

        # test validate
        cm = learner.validate(val_data=val)
        print(cm)
        for i, row in enumerate(cm):
            self.assertEqual(np.argmax(row), i)

        p = ktrain.get_predictor(learner.model, preproc)
        r = p.predict(x_test[0:1])
        print(r)
        self.assertEqual(r[0], 'seven')
        r = p.predict(x_test[0:1], return_proba=True)
        self.assertEqual(np.argmax(r[0]), 7)

        p.save('/tmp/test_predictor')
        p = ktrain.load_predictor('/tmp/test_predictor')
        r = p.predict(x_test[0:1])
        self.assertEqual(r[0], 'seven')
예제 #11
0
    def test_csv(self):
        train_fpath = './image_data/train-vision.csv'
        val_fpath = './image_data/valid-vision.csv'
        trn, val, preproc = vis.images_from_csv(
            train_fpath,
            'filename',
            directory='./image_data/image_folder/all',
            val_filepath=val_fpath,
            label_columns=['cat', 'dog'],
            data_aug=vis.get_data_aug(horizontal_flip=True))

        lr = 1e-4
        model = vis.image_classifier('pretrained_resnet50', trn, val)
        learner = ktrain.get_learner(model=model,
                                     train_data=trn,
                                     val_data=val,
                                     batch_size=4)
        learner.freeze()
        hist = learner.fit_onecycle(lr, 3)

        # test train
        self.assertAlmostEqual(max(hist.history['lr']), lr)
        if max(hist.history[ACC_NAME]) == 0.5:
            raise Exception('unlucky initialization: please run test again')
        self.assertGreater(max(hist.history[ACC_NAME]), 0.8)

        # test top_losses
        obs = learner.top_losses(n=1, val_data=val)
        print(obs)
        if obs:
            self.assertIn(obs[0][0], list(range(U.nsamples_from_data(val))))
        else:
            self.assertEqual(max(hist.history[VAL_ACC_NAME]), 1)

        # test weight decay
        self.assertEqual(len(learner.get_weight_decay()), 54)
        self.assertEqual(learner.get_weight_decay()[0], None)
        learner.set_weight_decay(1e-4)
        self.assertAlmostEqual(learner.get_weight_decay()[0], 1e-4)

        # test load and save model
        learner.save_model('/tmp/test_model')
        learner.load_model('/tmp/test_model')

        # test validate
        cm = learner.validate(val_data=val)
        print(cm)
        for i, row in enumerate(cm):
            self.assertEqual(np.argmax(row), i)

        # test predictor
        p = ktrain.get_predictor(learner.model, preproc)
        r = p.predict_folder('image_data/image_folder/train/')
        print(r)
        self.assertEqual(r[0][1], 'cat')
        r = p.predict_proba_folder('image_data/image_folder/train/')
        self.assertEqual(np.argmax(r[0][1]), 0)
        r = p.predict_filename(
            'image_data/image_folder/train/cat/cat.11737.jpg')
        self.assertEqual(r, ['cat'])
        r = p.predict_proba_filename(
            'image_data/image_folder/train/cat/cat.11737.jpg')
        self.assertEqual(np.argmax(r), 0)

        p.save('/tmp/test_predictor')
        p = ktrain.load_predictor('/tmp/test_predictor')
        r = p.predict_filename(
            'image_data/image_folder/train/cat/cat.11737.jpg')
        self.assertEqual(r, ['cat'])