def test_tokenize(self): model_spec = text_spec.AverageWordVecModelSpec() text = model_spec._tokenize('It\'s really good.') self.assertEqual(text, ['it\'s', 'really', 'good']) model_spec = text_spec.AverageWordVecModelSpec(lowercase=False) text = model_spec._tokenize('That is so cool!!!') self.assertEqual(text, ['That', 'is', 'so', 'cool'])
def setUp(self): super(AverageWordVecModelSpecTest, self).setUp() self.model_spec = text_spec.AverageWordVecModelSpec(seq_len=5) self.vocab = collections.OrderedDict( (('<PAD>', 0), ('<START>', 1), ('<UNKNOWN>', 2), ('good', 3), ('bad', 4))) self.model_spec.vocab = self.vocab
def test_average_wordvec_model_create_v1_incompatible(self): with self.assertRaisesRegex(ValueError, 'Incompatible versions'): model_spec = text_spec.AverageWordVecModelSpec(seq_len=2) all_data = text_dataloader.TextClassifierDataLoader.from_folder( self.text_dir, model_spec=model_spec) _ = text_classifier.create( all_data, model_spec=model_spec, )
def test_average_wordvec_model(self): model_spec = text_spec.AverageWordVecModelSpec(seq_len=2) all_data = text_dataloader.TextClassifierDataLoader.from_folder( self.text_dir, model_spec=model_spec) # Splits data, 90% data for training, 10% for testing self.train_data, self.test_data = all_data.split(0.5) model = text_classifier.create(self.train_data, model_spec=model_spec, epochs=1, batch_size=1, shuffle=True) self._test_accuracy(model, threshold=0.0) self._test_predict_top_k(model) self._test_export_to_tflite( model, threshold=0.0, expected_json_file='average_word_vec_metadata.json') self._test_export_to_saved_model(model) self._test_export_labels(model) self._test_export_vocab(model) self._test_model_without_training(model_spec)