def test_linreg(self): trn, val, preproc = txt.texts_from_array(x_train=self.trn[0], y_train=self.trn[1], x_test=self.val[0], y_test=self.val[1], preprocess_mode='standard', ngram_range=3, maxlen=200, max_features=35000) model = txt.text_regression_model('linreg', train_data=trn, preproc=preproc) learner = ktrain.get_learner(model, train_data=trn, val_data=val, batch_size=256) lr = 0.01 hist = learner.fit_onecycle(lr, 10) # test training results self.assertAlmostEqual(max(hist.history['lr']), lr) self.assertLess(min(hist.history['val_mae']), 12) # test top losses obs = learner.top_losses(n=1, val_data=None) self.assertIn(obs[0][0], list(range(len(val[0])))) learner.view_top_losses(preproc=preproc, n=1, val_data=None) # test weight decay self.assertEqual(learner.get_weight_decay(), None) learner.set_weight_decay(1e-2) self.assertAlmostEqual(learner.get_weight_decay(), 1e-2) # test load and save model learner.save_model('/tmp/test_model') learner.load_model('/tmp/test_model') # test predictor p = ktrain.get_predictor(learner.model, preproc) self.assertGreater(p.predict([TEST_DOC])[0], 100) p.save('/tmp/test_predictor') p = ktrain.load_predictor('/tmp/test_predictor') self.assertGreater(p.predict([TEST_DOC])[0], 100) self.assertIsNone(p.explain(TEST_DOC))
def test_distilbert(self): trn, val, preproc = txt.texts_from_array(x_train=self.trn[0], y_train=self.trn[1], x_test=self.val[0], y_test=self.val[1], preprocess_mode='distilbert', maxlen=75) model = txt.text_regression_model('distilbert', train_data=trn, preproc=preproc) learner = ktrain.get_learner(model, train_data=trn, val_data=val, batch_size=100) lr = 5e-5 hist = learner.fit_onecycle(lr, 1) # test training results self.assertAlmostEqual(max(hist.history['lr']), lr) self.assertLess(min(hist.history['val_mae']), 16) # test top losses obs = learner.top_losses(n=1, val_data=None) self.assertIn(obs[0][0], list(range(len(val.x)))) learner.view_top_losses(preproc=preproc, n=1, val_data=None) # test weight decay self.assertEqual(learner.get_weight_decay(), None) learner.set_weight_decay(1e-2) self.assertAlmostEqual(learner.get_weight_decay(), 1e-2) # test load and save model tmp_folder = ktrain.imports.tempfile.mkdtemp() learner.save_model(tmp_folder) learner.load_model(tmp_folder, preproc=preproc) # test predictor p = ktrain.get_predictor(learner.model, preproc, batch_size=64) self.assertGreater(p.predict([TEST_DOC])[0], 1) tmp_folder = ktrain.imports.tempfile.mkdtemp() p.save(tmp_folder) p = ktrain.load_predictor(tmp_folder, batch_size=64) self.assertGreater(p.predict([TEST_DOC])[0], 1) self.assertIsNone(p.explain(TEST_DOC))
parser.add_argument('--sm-model-dir', type=str, default=os.environ.get('SM_MODEL_DIR')) parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN')) parser.add_argument('--test', type=str, default=os.environ.get('SM_CHANNEL_TEST')) parser.add_argument('--preproc', type=str, default=os.environ.get('SM_CHANNEL_PREPROC')) return parser.parse_known_args() if __name__ =='__main__': args, unknown = _parse_args() trn = _load_training_data(args.train) val = _load_testing_data(args.test) preproc = _load_preproc(args.preproc) with mirrored_strategy.scope(): model = text.text_regression_model(args.model_name, train_data=trn, preproc=preproc) learner = ktrain.get_learner(model, train_data=trn, val_data=val, batch_size=args.batch_size) learner.autofit(args.learning_rate, args.epochs, checkpoint_folder=args.model_dir) learner.validate(val_data=val) predictor = ktrain.get_predictor(learner.model, preproc) predictor.save(args.sm_model_dir)