Ejemplo n.º 1
0
def get_word2frequency():
    frequency_table_path = os.path.join(VARIOUS_DIR,
                                        'enwiki_frequency_table.tsv')
    word2frequency = {}
    for line in yield_lines(frequency_table_path):
        word, frequency = line.split('\t')
        word2frequency[word] = int(frequency)
    return word2frequency
Ejemplo n.º 2
0
def get_word2rank(vocab_size=50000):
    frequency_table_path = os.path.join(VARIOUS_DIR,
                                        'enwiki_frequency_table.tsv')
    word2rank = {}
    for rank, line in enumerate(yield_lines(frequency_table_path)):
        if (rank + 1) > vocab_size:
            break
        word, _ = line.split('\t')
        word2rank[word] = rank
    return word2rank