Beispiel #1
0
def merge_lists(input_names, output_name):
    count_dicts = []
    for input_name in input_names:
        values, total = read_values(input_name, cutoff=0)
        count_dicts.append(values)
    merged = merge_counts(count_dicts)
    write_wordlist(merged, output_name)
Beispiel #2
0
def merge_lists(input_names, output_name, cutoff=0, max_words=1000000):
    count_dicts = []
    for input_name in input_names:
        values, total = read_values(input_name,
                                    cutoff=cutoff,
                                    max_words=max_words)
        count_dicts.append(values)
    merged = merge_counts(count_dicts)
    write_wordlist(merged, output_name)
Beispiel #3
0
def merge_lists(input_names, output_name, cutoff, lang):
    freq_dicts = []

    # Don't use Chinese tokenization while building wordlists, as that would
    # create a circular dependency.
    if lang == 'zh':
        lang = None

    for input_name in input_names:
        freq_dicts.append(read_freqs(input_name, cutoff=cutoff, lang=lang))
    merged = merge_freqs(freq_dicts)
    write_wordlist(merged, output_name)
Beispiel #4
0
def merge_lists(input_names, output_name, cutoff, lang):
    freq_dicts = []

    # Don't use Chinese tokenization while building wordlists, as that would
    # create a circular dependency.
    if lang == 'zh':
        lang = None

    for input_name in input_names:
        freq_dicts.append(read_freqs(input_name, cutoff=cutoff, lang=lang))
    merged = merge_freqs(freq_dicts)
    write_wordlist(merged, output_name)
Beispiel #5
0
def merge_lists(input_names, output_name):
    freq_dicts = []
    for input_name in input_names:
        freq_dicts.append(read_freqs(input_name, cutoff=2))
    merged = merge_freqs(freq_dicts)
    write_wordlist(merged, output_name)
Beispiel #6
0
def merge_lists(input_names, output_name):
    freq_dicts = []
    for input_name in input_names:
        freq_dicts.append(read_freqs(input_name, cutoff=2))
    merged = merge_freqs(freq_dicts)
    write_wordlist(merged, output_name)
Beispiel #7
0
def handle_counts(filename_in, filename_out):
    counts = count_tokens(filename_in)
    write_wordlist(counts, filename_out)
Beispiel #8
0
def handle_counts(filename_in, filename_out):
    counts = count_tokens(filename_in)
    write_wordlist(counts, filename_out)