def test_train_vocab_init(self): vocab = set() for words in np.r_[self.x_train, self.x_test, self.x_test]: for word in words: vocab.add(word) model = polygo.Sequence(initial_vocab=vocab, embeddings=self.embeddings) model.fit(self.x_train, self.y_train, self.x_test, self.y_test)
def test_analyze(self): model = polygo.Sequence() model.fit(self.x_train, self.y_train) res = model.analyze(self.text) pprint(res) self.assertIn('words', res) self.assertIn('entities', res)
def test_save_and_load(self): weights_file = os.path.join(SAVE_ROOT, 'weights.h5') params_file = os.path.join(SAVE_ROOT, 'params.json') preprocessor_file = os.path.join(SAVE_ROOT, 'preprocessor.pickle') model = polygo.Sequence() model.fit(self.x_train, self.y_train) model.save(weights_file, params_file, preprocessor_file) score1 = model.score(self.x_test, self.y_test) self.assertTrue(weights_file) self.assertTrue(params_file) self.assertTrue(preprocessor_file) model = polygo.Sequence.load(weights_file, params_file, preprocessor_file) score2 = model.score(self.x_test, self.y_test) self.assertEqual(score1, score2)
def test_train_callbacks(self): weights_file = os.path.join(SAVE_ROOT, 'weights.h5') params_file = os.path.join(SAVE_ROOT, 'params.json') preprocessor_file = os.path.join(SAVE_ROOT, 'preprocessor.pickle') log_dir = os.path.join(os.path.dirname(__file__), 'logs') file_name = '_'.join(['weights', '{epoch:02d}', '{f1:2.4f}']) + '.h5' callback = ModelCheckpoint(os.path.join(log_dir, file_name), monitor='f1', save_weights_only=True) vocab = set() for words in np.r_[self.x_train, self.x_test, self.x_test]: for word in words: vocab.add(word) model = polygo.Sequence(initial_vocab=vocab, embeddings=self.embeddings) model.fit(self.x_train, self.y_train, self.x_test, self.y_test, epochs=100, callbacks=[callback]) model.save(weights_file, params_file, preprocessor_file)
def test_score(self): model = polygo.Sequence() model.fit(self.x_train, self.y_train) score = model.score(self.x_test, self.y_test) self.assertIsInstance(score, float)
def test_train_with_pretrained_embedding(self): model = polygo.Sequence(embeddings=self.embeddings) model.fit(self.x_train, self.y_train, self.x_test, self.y_test)