Esempio n. 1
0
 def test_end_to_end_conversion_to_arrays_with_word_and_character_tokenizer(
         self):
     TextInstance.tokenizer = tokenizers['words and characters'](Params({}))
     instance = SentenceInstance("this is a sentence")
     indexed_instance = instance.to_indexed_instance(self.data_indexer)
     indexed_instance.pad({
         'num_sentence_words': 6,
         'num_word_characters': 5
     })
     word_array, label_array = indexed_instance.as_training_data()
     assert_array_equal(word_array, [
         [0, 0, 0, 0, 0],
         [self.start_index, 0, 0, 0, 0],
         [
             self.this_index, self.t_char_index, self.h_char_index,
             self.i_char_index, self.s_char_index
         ],
         [self.is_index, self.i_char_index, self.s_char_index, 0, 0],
         [self.a_index, self.a_char_index, 0, 0, 0],
         [
             self.sentence_index, self.s_char_index, self.e_char_index,
             self.n_char_index, self.t_char_index
         ],
     ])
     assert_array_equal(
         label_array,
         [[0], [self.this_index], [self.is_index], [self.a_index],
          [self.sentence_index], [self.end_index]])
Esempio n. 2
0
 def test_end_to_end_conversion_to_arrays(self):
     instance = SentenceInstance("this is a sentence")
     indexed_instance = instance.to_indexed_instance(self.data_indexer)
     indexed_instance.pad({'num_sentence_words': 7})
     word_array, label_array = indexed_instance.as_training_data()
     assert_array_equal(word_array, [0, 0, self.start_index, self.this_index, self.is_index,
                                     self.a_index, self.sentence_index])
     assert_array_equal(label_array, [[0], [0], [self.this_index], [self.is_index],
                                      [self.a_index], [self.sentence_index], [self.end_index]])
Esempio n. 3
0
 def test_end_to_end_conversion_to_arrays_with_character_tokenizer(self):
     TextInstance.tokenizer = tokenizers['characters'](Params({}))
     instance = SentenceInstance("a sentence")
     indexed_instance = instance.to_indexed_instance(self.data_indexer)
     indexed_instance.pad({'num_sentence_words': 13})
     word_array, label_array = indexed_instance.as_training_data()
     assert_array_equal(word_array, [0, 0, self.start_index, self.a_index, self.space_index,
                                     self.s_index, self.e_index, self.n_index, self.t_index,
                                     self.e_index, self.n_index, self.c_index, self.e_index])
     assert_array_equal(label_array, [[0], [0], [self.a_index], [self.space_index],
                                      [self.s_index], [self.e_index], [self.n_index],
                                      [self.t_index], [self.e_index], [self.n_index],
                                      [self.c_index], [self.e_index], [self.end_index]])