def main(): setup_logger() init_random() log_to_info('starting main') start = time.time() reviews = _get_reviews() if 'best_topics' in reviews: log_to_info('best topics already set, aborting!') return log_to_info('getting reviews done') if not os.path.exists(dictionary_cache_name) or not os.path.exists(mm_cache_name): documents = _get_documents(reviews) log_to_info('dictionary') if os.path.exists(dictionary_cache_name): dictionary = gensim.corpora.Dictionary.load(dictionary_cache_name) else: dictionary = gensim.corpora.Dictionary(documents) dictionary.save(dictionary_cache_name) log_to_info('mm') if os.path.exists(mm_cache_name): corpus = gensim.corpora.MmCorpus(mm_cache_name) else: corpus = [dictionary.doc2bow(text) for text in documents] gensim.corpora.MmCorpus.serialize(mm_cache_name, corpus) log_to_info('lda') if os.path.exists(lda_cache_name): if model_type == 'lsi': lda = gensim.models.LsiModel.load(lda_cache_name) else: lda = gensim.models.LdaModel.load(lda_cache_name) else: if model_type == 'lsi': lda = gensim.models.LsiModel(corpus=corpus, id2word=dictionary, num_topics=num_topics) elif single_pass: lda = gensim.models.LdaModel(corpus=corpus, id2word=dictionary, num_topics=20) else: lda = gensim.models.LdaModel(corpus=corpus, id2word=dictionary, num_topics=20, update_every=0, passes=20) lda.save(lda_cache_name) log_to_info('it took {0} seconds'.format(time.time() - start)) infer_topics(lda, reviews) log_to_info('everything took {0} seconds'.format(time.time() - start))
def slave_main(): log_handler, log_buffer = setup_logger(True) run = get_random_pending_run() # noinspection PyBroadException try: if not run: log_to_info('Nothing to start, exiting') return # log_to_info('NothingScore: 77.7% to start, exiting') main(run=run) except Exception: logging.exception('Unknown error') output = get_log_output(log_handler, log_buffer) report_results(run['id'], output)
def run_and_say(**kwargs): log_handler, log_buffer = setup_logger(True) run_with(**kwargs) say_result(log_handler, log_buffer)
from algorithms.Word2Vec_ParagraphVectors import main from helpers.log_config import setup_logger if __name__ == '__main__': setup_logger() main()