def test_get_all_test_documents(self): testing_dataset_handler = TestDatasetHandler("example_test_data_set") result = testing_dataset_handler.get_all_test_documents() expected_document_names = ["cat1","cat2","dog1","no_category1"] expected_document_names = set(expected_document_names) result = set(list(result.keys())) self.assertEqual(result,expected_document_names)
def prepare_gold_standard_categorization(experiment_spec, gold_standard_categorization_directory): test_data_id = dataset_id_handler.get_test_data_id(experiment_spec) if cache.in_cache(gold_standard_categorization_directory,test_data_id): print("Gold standard categorization in cache: "+ test_data_id) return test_dataset_handler = TestDatasetHandler(experiment_spec["test_dataset"]["id"]) gold_standard_categorization = test_dataset_handler.get_gold_standard_categorization() pprint.pprint(gold_standard_categorization) cache.write(gold_standard_categorization_directory,test_data_id,gold_standard_categorization)
def test_get_gold_standard_cateogorization(self): testing_dataset_handler = TestDatasetHandler("example_test_data_set") result = testing_dataset_handler.get_gold_standard_categorization() expected_cateogories = set(["cats", "dogs"]) result_categories = set(result.keys()) self.assertEqual(result_categories,expected_cateogories)