Esempio n. 1
0
def create_corpus():
    corpus = Corpus()
    for folder in glob.iglob('texts/*'):
        for filename in glob.iglob(folder + "/*"):
            corpus.add_document(Document(filename))
            # corpus.add_document(Document(folder))

    corpus.build_vocabulary()
    return corpus