def test_get_merged_posting_lists_empty(self): postinglist_1 = [(0,5),(1,1),(2,2)] postinglist_2 =[] expected_merged_list= [(0,5),(1,1),(2,2)] merged_list_1 = index_operations.get_merged_posting_lists(postinglist_1,postinglist_2) merged_list_2 = index_operations.get_merged_posting_lists(postinglist_2,postinglist_1) self.assertEqual(merged_list_1,expected_merged_list) self.assertEqual(merged_list_2,expected_merged_list)
def get_seed_words_posting_lists(seed_words_spec, index_directory, indices_id): # Return a map of postings for each reference_word_group_id posting_lists={} # set up structure of posting list map for category in seed_words_spec: posting_lists[category] = {} for index_id in indices_id: index = cache.load(index_directory, index_id) print("merging postining_list for index:") print(index["index_type"]) index = index["index"] for category in seed_words_spec: reference_word_groups = seed_words_spec[category] for id_reference_word_group in reference_word_groups: if not id_reference_word_group in posting_lists[category]: posting_lists[category][id_reference_word_group] = [] for reference_word in reference_word_groups[id_reference_word_group]: reference_term = n_gram_handler.string_to_index_term(reference_word) if reference_term in index: merged_postings = index_operations.get_merged_posting_lists(posting_lists[category][id_reference_word_group], index[reference_term]) posting_lists[category][id_reference_word_group] = merged_postings return posting_lists
def test_get_merged_posting_lists(self): postinglist_1 = [("0",5),("1",1),("2",2)] postinglist_2 =[("1",1),("3",3)] expected_merged_list= [("0",5),("1",2),("2",2),("3",3)] merged_list = index_operations.get_merged_posting_lists(postinglist_1,postinglist_2) self.assertEqual(merged_list,expected_merged_list)