def load_year_index_infos(index_dir, years, word_file, num_words=-1):
    """
    Returns dictionary mapping year to:
        "index": word->id index for that year.
        "list": word_list for that year
        "indices": set of valid indices corresponding to the word list
    Assumes that each year is indexed seperately.
    """
    if "index.pkl" in os.listdir(index_dir):
        return load_year_index_infos_common(load_pickle(index_dir +
                                                        "index.pkl"),
                                            years,
                                            word_file,
                                            num_words=num_words)
    year_index_infos = collections.defaultdict(dict)
    word_lists = load_year_words(word_file, years)
    for year, word_list in word_lists.iteritems():
        year_index = load_pickle(index_dir + "/" + str(year) + "-index.pkl")
        year_index_infos[year]["index"] = year_index
        if num_words != -1:
            word_list = word_list[:num_words]
        word_list, word_indices = get_word_indices(word_list, year_index)
        year_index_infos[year]["list"] = word_list
        year_index_infos[year]["indices"] = word_indices
    return year_index_infos
def load_year_index_infos_common(common_index, years, word_file, num_words=-1):
    """
    Returns dictionary mapping year to:
        "index": word->id index for that year.
        "list": word_list for that year
        "indices": set of valid indices corresponding to the word list
    Assumes that each year is indexed seperately.
    """
    year_index_infos = collections.defaultdict(dict)
    word_lists = load_year_words(word_file, years)
    for year, word_list in word_lists.iteritems():
        year_index = common_index
        year_index_infos[year]["index"] = year_index
        if num_words != -1:
            word_list = word_list[:num_words]
        word_list, word_indices = get_word_indices(word_list, year_index)
        year_index_infos[year]["list"] = word_list
        year_index_infos[year]["indices"] = word_indices
    return year_index_infos
Example #3
0
def load_year_index_infos_common(common_index, years, word_file, num_words=-1):
    """
    Returns dictionary mapping year to:
        "index": word->id index for that year.
        "list": word_list for that year
        "indices": set of valid indices corresponding to the word list
    Assumes that each year is indexed seperately.
    """
    year_index_infos = collections.defaultdict(dict)
    word_lists = load_year_words(word_file, years)
    for year, word_list in word_lists.iteritems():
        year_index = common_index
        year_index_infos[year]["index"] = year_index
        if num_words != -1:
            word_list = word_list[:num_words]
        word_list, word_indices = get_word_indices(word_list, year_index)
        year_index_infos[year]["list"] = word_list
        year_index_infos[year]["indices"] = word_indices
    return year_index_infos
Example #4
0
def load_year_index_infos(index_dir, years, word_file, num_words=-1):
    """
    Returns dictionary mapping year to:
        "index": word->id index for that year.
        "list": word_list for that year
        "indices": set of valid indices corresponding to the word list
    Assumes that each year is indexed seperately.
    """
    if "index.pkl" in os.listdir(index_dir):
        return load_year_index_infos_common(load_pickle(index_dir + "index.pkl"),
                years, word_file, num_words=num_words)
    year_index_infos = collections.defaultdict(dict)
    word_lists = load_year_words(word_file, years)
    for year, word_list in word_lists.iteritems():
        year_index = load_pickle(index_dir + "/" + str(year) + "-index.pkl") 
        year_index_infos[year]["index"] = year_index
        if num_words != -1:
            word_list = word_list[:num_words]
        word_list, word_indices = get_word_indices(word_list, year_index)
        year_index_infos[year]["list"] = word_list
        year_index_infos[year]["indices"] = word_indices
    return year_index_infos