import os from corpus.textExtractor import TextExtractor from corpus.bowBuilder import BowBuilder from lda.ldaCalc import LdaCalc cache_root = os.path.join(os.getcwd(), "cache") # start here! # get text print("\nextracting text from corpus ------------------------------------\n") extractor = TextExtractor(cache_root) extractor.get_texts() # extractor.save() # convert to BOW vectors print("\nbuilding BOW vectors from corpus ------------------------------------\n") bowBuilder = BowBuilder(docs=extractor.texts, cache_dir=cache_root) bowBuilder.generate_bows() bowBuilder.save() # run the LDA print("\ntraining LDA model -----------------------------------------------\n") lda = LdaCalc(bowBuilder.bowVectorCorpus, bowBuilder.id2word, cache_root) lda.run_lda() print("\nsaving LDA model -----------------------------------------------\n") lda.save() lda.print_topics()
__author__ = 'thomas' import os from corpus.bowBuilder import BowBuilder from lda.ldaCalc import LdaCalc cache_root = os.path.join(os.getcwd(), "cache") bowBuilder = BowBuilder(cache_dir=cache_root) bowBuilder.load() bows = bowBuilder.bowVectorCorpus print("\nLoading LDA model -----------------------------------------------\n") lda = LdaCalc(bows=bows, sims_cache_dir=cache_root) lda.load() lda.print_topics() lda.calc_sims() # lda.save_sims()