def test_get_all_test_documents(self):
     testing_dataset_handler = TestDatasetHandler("example_test_data_set")
     result = testing_dataset_handler.get_all_test_documents()
     expected_document_names = ["cat1","cat2","dog1","no_category1"]
     expected_document_names = set(expected_document_names)
     result = set(list(result.keys()))
     self.assertEqual(result,expected_document_names)
def prepare_gold_standard_categorization(experiment_spec, gold_standard_categorization_directory):
    test_data_id = dataset_id_handler.get_test_data_id(experiment_spec)
    if cache.in_cache(gold_standard_categorization_directory,test_data_id):
        print("Gold standard categorization in cache: "+ test_data_id)
        return
    test_dataset_handler = TestDatasetHandler(experiment_spec["test_dataset"]["id"])
    gold_standard_categorization = test_dataset_handler.get_gold_standard_categorization()
    pprint.pprint(gold_standard_categorization)
    cache.write(gold_standard_categorization_directory,test_data_id,gold_standard_categorization)
 def test_get_gold_standard_cateogorization(self):
     testing_dataset_handler = TestDatasetHandler("example_test_data_set")
     result = testing_dataset_handler.get_gold_standard_categorization()
     expected_cateogories = set(["cats", "dogs"])
     result_categories = set(result.keys())
     self.assertEqual(result_categories,expected_cateogories)