Esempio n. 1
0
 def test_end_to_end_conversion_to_arrays_with_word_and_character_tokenizer(
         self):
     TextInstance.tokenizer = tokenizers['words and characters'](Params({}))
     instance = SentenceInstance("this is a sentence")
     indexed_instance = instance.to_indexed_instance(self.data_indexer)
     indexed_instance.pad({
         'num_sentence_words': 6,
         'num_word_characters': 5
     })
     word_array, label_array = indexed_instance.as_training_data()
     assert_array_equal(word_array, [
         [0, 0, 0, 0, 0],
         [self.start_index, 0, 0, 0, 0],
         [
             self.this_index, self.t_char_index, self.h_char_index,
             self.i_char_index, self.s_char_index
         ],
         [self.is_index, self.i_char_index, self.s_char_index, 0, 0],
         [self.a_index, self.a_char_index, 0, 0, 0],
         [
             self.sentence_index, self.s_char_index, self.e_char_index,
             self.n_char_index, self.t_char_index
         ],
     ])
     assert_array_equal(
         label_array,
         [[0], [self.this_index], [self.is_index], [self.a_index],
          [self.sentence_index], [self.end_index]])
Esempio n. 2
0
 def test_end_to_end_conversion_to_arrays(self):
     instance = SentenceInstance("this is a sentence")
     indexed_instance = instance.to_indexed_instance(self.data_indexer)
     indexed_instance.pad({'num_sentence_words': 7})
     word_array, label_array = indexed_instance.as_training_data()
     assert_array_equal(word_array, [0, 0, self.start_index, self.this_index, self.is_index,
                                     self.a_index, self.sentence_index])
     assert_array_equal(label_array, [[0], [0], [self.this_index], [self.is_index],
                                      [self.a_index], [self.sentence_index], [self.end_index]])
Esempio n. 3
0
 def test_end_to_end_conversion_to_arrays_with_character_tokenizer(self):
     TextInstance.tokenizer = tokenizers['characters'](Params({}))
     instance = SentenceInstance("a sentence")
     indexed_instance = instance.to_indexed_instance(self.data_indexer)
     indexed_instance.pad({'num_sentence_words': 13})
     word_array, label_array = indexed_instance.as_training_data()
     assert_array_equal(word_array, [0, 0, self.start_index, self.a_index, self.space_index,
                                     self.s_index, self.e_index, self.n_index, self.t_index,
                                     self.e_index, self.n_index, self.c_index, self.e_index])
     assert_array_equal(label_array, [[0], [0], [self.a_index], [self.space_index],
                                      [self.s_index], [self.e_index], [self.n_index],
                                      [self.t_index], [self.e_index], [self.n_index],
                                      [self.c_index], [self.e_index], [self.end_index]])
Esempio n. 4
0
    def test_read_from_line_handles_two_column(self):
        index = 23
        text = "this is a sentence"
        line = self.instance_to_line(text, index)

        instance = SentenceInstance.read_from_line(line)
        assert instance.text == text
        assert instance.index == index
        assert instance.label is None
Esempio n. 5
0
 def test_read_from_line_handles_one_column(self):
     text = "this is a sentence"
     instance = SentenceInstance.read_from_line(text)
     assert instance.text == text
     assert instance.label is None
     assert instance.index is None