def get_gold_standard_categorization(self):
     gold_standard_categorization = {}
     categories = self.__get_all_category_directory_names()
     for category in categories:
         category_path = self.__get_category_path(category)
         all_documents_in_category = dataset_handler.get_names_of_files_in_directory(category_path)
         category_index_term = n_gram_handler.string_to_index_term(category)
         gold_standard_categorization[category_index_term] = all_documents_in_category
     return gold_standard_categorization
 def get_all_test_documents(self):
     all_test_documents = {}
     sub_directories = dataset_handler.get_all_subdirectory_names(self.data_path)
     for sub_directory in sub_directories:
         sub_directory_path = os.path.join(self.data_path, sub_directory)
         files_indices = dataset_handler.get_names_of_files_in_directory(sub_directory_path)
         for file_id in files_indices:
             file_path = os.path.join(sub_directory_path, file_id)
             all_test_documents[file_id] = dataset_handler.get_document_as_string(file_path,self.encoding)
     return all_test_documents