Esempio n. 1
0
def word_count_dir(dir_path):
    file_names = []
    db = DBConnect()
    dictionary = db.select_tags()
    for root, dirs, files in walk(dir_path):
        for name in files:
            if 'pdf' in name or 'htm' in name or 'txt' in name:
                file_names.append(path.join(root, name))
            else:
                continue
    click.secho("Counting words for {} PDF documents.".format(len(file_names))
                , fg='blue')
    n_jobs = 20
    dir_counters = Parallel(n_jobs=n_jobs)(delayed(word_count)(file_name) for file_name in file_names)
    total = sum(dir_counters, Counter())
    for word in dictionary:
        if word in total:
            db.insert_word({'word': word, 'frequency': total[word]})
    return total