Esempio n. 1
0
 def test_read_labeled_background_from_file_loads_correct_instances(self):
     filename = self.TEST_DIR + 'test_dataset_file'
     with open(filename, 'w') as datafile:
         datafile.write("1\tinstance1\t0\n")
         datafile.write("2\tinstance2\t1\n")
         datafile.write("3\tinstance3\n")
     background_filename = self.TEST_DIR + 'test_dataset_background'
     with open(background_filename, 'w') as datafile:
         datafile.write("1\t2\tb1\tb2\tb3\tb4\n")
         datafile.write("2\t1,3\tb1\tb2\tb3\tb4\n")
         datafile.write("3\t0\tb5\tb6\tb7\tb8\n")
     dataset = TextDataset.read_from_file(filename, TrueFalseInstance)
     background_dataset = TextDataset.read_labeled_background_from_file(
         dataset, background_filename)
     assert len(background_dataset.instances) == 3
     instance = background_dataset.instances[0]
     assert isinstance(instance, LabeledBackgroundInstance)
     assert instance.instance.index == 1
     assert instance.instance.text == "instance1"
     assert instance.label == [2]
     assert instance.background == ['b1', 'b2', 'b3', 'b4']
     instance = background_dataset.instances[1]
     assert isinstance(instance, LabeledBackgroundInstance)
     assert instance.instance.index == 2
     assert instance.instance.text == "instance2"
     assert instance.label == [1, 3]
     assert instance.background == ['b1', 'b2', 'b3', 'b4']
     instance = background_dataset.instances[2]
     assert isinstance(instance, LabeledBackgroundInstance)
     assert instance.instance.index == 3
     assert instance.instance.text == "instance3"
     assert instance.label == [0]
     assert instance.background == ['b5', 'b6', 'b7', 'b8']
    def test_indexed_instance_padding(self):
        data_indexer = DataIndexer()
        dataset = TextDataset([self.instance])
        data_indexer.fit_word_dictionary(dataset)

        indexed = self.instance.to_indexed_instance(data_indexer)
        num_question_tuples = 1
        num_background_tuples = 4
        num_slots = 3
        slot_length = 6
        num_options = 4
        padding_lengths = {
            'num_question_tuples': num_question_tuples,
            'num_background_tuples': num_background_tuples,
            'num_slots': num_slots,
            'num_sentence_words': slot_length,
            'num_options': num_options
        }
        indexed.pad(padding_lengths)
        assert len(indexed.answers_indexed) == num_options
        for answer_option_tuples in indexed.answers_indexed:
            assert len(answer_option_tuples) == num_question_tuples
            for ans_tuple in answer_option_tuples:
                assert len(ans_tuple) == num_slots
                for slot in ans_tuple:
                    assert len(slot) == slot_length
        assert len(indexed.background_indexed) == num_background_tuples
        for background_tuple in indexed.background_indexed:
            assert len(background_tuple) == num_slots
            for slot in background_tuple:
                assert len(slot) == slot_length
Esempio n. 3
0
    def test_fit_word_dictionary_respects_min_count(self):
        instance = TextClassificationInstance("a a a a b b c c c", True)
        dataset = TextDataset([instance])
        data_indexer = DataIndexer()
        data_indexer.fit_word_dictionary(dataset, min_count=4)
        assert 'a' in data_indexer.words_in_index()
        assert 'b' not in data_indexer.words_in_index()
        assert 'c' not in data_indexer.words_in_index()

        data_indexer = DataIndexer()
        data_indexer.fit_word_dictionary(dataset, min_count=1)
        assert 'a' in data_indexer.words_in_index()
        assert 'b' in data_indexer.words_in_index()
        assert 'c' in data_indexer.words_in_index()
Esempio n. 4
0
 def test_read_from_file_with_no_default_label(self):
     filename = self.TEST_DIR + 'test_dataset_file'
     with open(filename, 'w') as datafile:
         datafile.write("1\tinstance1\t0\n")
         datafile.write("2\tinstance2\t1\n")
         datafile.write("3\tinstance3\n")
     dataset = TextDataset.read_from_file(filename, TrueFalseInstance)
     assert len(dataset.instances) == 3
     instance = dataset.instances[0]
     assert instance.index == 1
     assert instance.text == "instance1"
     assert instance.label is False
     instance = dataset.instances[1]
     assert instance.index == 2
     assert instance.text == "instance2"
     assert instance.label is True
     instance = dataset.instances[2]
     assert instance.index == 3
     assert instance.text == "instance3"
     assert instance.label is None
Esempio n. 5
0
 def score_instance(self, instance):
     dataset = TextDataset([instance])
     return self.solver.score_text_dataset(dataset)