예제 #1
0
 def test_join_word_tokens_with_title(self):
     word_vocab = ['A', 'B', 'C']
     tag_vocab = ['D', 'E', 'F']
     to_ids_fn = tag_prediction_preprocessing.build_to_ids_fn(
         word_vocab, tag_vocab)
     data = {'tokens': 'A B C', 'title': 'A B', 'tags': ''}
     processed = to_ids_fn(data)
     self.assertAllClose(self.evaluate(processed[0]), [2 / 5, 2 / 5, 1 / 5])
 def test_word_tokens_all_oov(self):
     word_vocab = ['A', 'B']
     tag_vocab = ['D', 'E', 'F']
     to_ids_fn = tag_prediction_preprocessing.build_to_ids_fn(
         word_vocab, tag_vocab)
     data = {'tokens': 'C D E F G', 'title': '', 'tags': ''}
     processed = to_ids_fn(data)
     self.assertAllClose(self.evaluate(processed[0]), [0, 0])
예제 #3
0
 def test_tag_tokens_to_ids_with_oov(self):
     word_vocab = ['A', 'B', 'C']
     tag_vocab = ['D', 'E']
     to_ids_fn = tag_prediction_preprocessing.build_to_ids_fn(
         word_vocab, tag_vocab)
     data = {'tokens': '', 'title': '', 'tags': 'D|E|F'}
     processed = to_ids_fn(data)
     self.assertAllClose(self.evaluate(processed[1]), [1, 1])
 def test_word_tokens_to_ids_with_duplicates_and_oov(self):
     word_vocab = ['A', 'B']
     tag_vocab = ['D', 'E', 'F']
     to_ids_fn = tag_prediction_preprocessing.build_to_ids_fn(
         word_vocab, tag_vocab)
     data = {'tokens': 'A B C A C C A B', 'title': '', 'tags': ''}
     processed = to_ids_fn(data)
     self.assertAllClose(self.evaluate(processed[0]), [3 / 5, 2 / 5])