def load_year_index_infos(index_dir, years, word_file, num_words=-1): """ Returns dictionary mapping year to: "index": word->id index for that year. "list": word_list for that year "indices": set of valid indices corresponding to the word list Assumes that each year is indexed seperately. """ if "index.pkl" in os.listdir(index_dir): return load_year_index_infos_common(load_pickle(index_dir + "index.pkl"), years, word_file, num_words=num_words) year_index_infos = collections.defaultdict(dict) word_lists = load_year_words(word_file, years) for year, word_list in word_lists.iteritems(): year_index = load_pickle(index_dir + "/" + str(year) + "-index.pkl") year_index_infos[year]["index"] = year_index if num_words != -1: word_list = word_list[:num_words] word_list, word_indices = get_word_indices(word_list, year_index) year_index_infos[year]["list"] = word_list year_index_infos[year]["indices"] = word_indices return year_index_infos
def load_year_index_infos_common(common_index, years, word_file, num_words=-1): """ Returns dictionary mapping year to: "index": word->id index for that year. "list": word_list for that year "indices": set of valid indices corresponding to the word list Assumes that each year is indexed seperately. """ year_index_infos = collections.defaultdict(dict) word_lists = load_year_words(word_file, years) for year, word_list in word_lists.iteritems(): year_index = common_index year_index_infos[year]["index"] = year_index if num_words != -1: word_list = word_list[:num_words] word_list, word_indices = get_word_indices(word_list, year_index) year_index_infos[year]["list"] = word_list year_index_infos[year]["indices"] = word_indices return year_index_infos