Ejemplo n.º 1
0
def main():
    setup_logger()
    init_random()
    log_to_info('starting main')
    start = time.time()

    reviews = _get_reviews()
    if 'best_topics' in reviews:
        log_to_info('best topics already set, aborting!')
        return

    log_to_info('getting reviews done')

    if not os.path.exists(dictionary_cache_name) or not os.path.exists(mm_cache_name):
        documents = _get_documents(reviews)

    log_to_info('dictionary')

    if os.path.exists(dictionary_cache_name):
        dictionary = gensim.corpora.Dictionary.load(dictionary_cache_name)
    else:
        dictionary = gensim.corpora.Dictionary(documents)
        dictionary.save(dictionary_cache_name)

    log_to_info('mm')
    if os.path.exists(mm_cache_name):
        corpus = gensim.corpora.MmCorpus(mm_cache_name)
    else:
        corpus = [dictionary.doc2bow(text) for text in documents]
        gensim.corpora.MmCorpus.serialize(mm_cache_name, corpus)

    log_to_info('lda')
    if os.path.exists(lda_cache_name):
        if model_type == 'lsi':
            lda = gensim.models.LsiModel.load(lda_cache_name)
        else:
            lda = gensim.models.LdaModel.load(lda_cache_name)
    else:
        if model_type == 'lsi':
            lda = gensim.models.LsiModel(corpus=corpus, id2word=dictionary, num_topics=num_topics)
        elif single_pass:
            lda = gensim.models.LdaModel(corpus=corpus, id2word=dictionary, num_topics=20)
        else:
            lda = gensim.models.LdaModel(corpus=corpus, id2word=dictionary, num_topics=20, update_every=0, passes=20)
        lda.save(lda_cache_name)

    log_to_info('it took {0} seconds'.format(time.time() - start))
    infer_topics(lda, reviews)
    log_to_info('everything took {0} seconds'.format(time.time() - start))
def slave_main():
    log_handler, log_buffer = setup_logger(True)
    run = get_random_pending_run()
    # noinspection PyBroadException
    try:
        if not run:
            log_to_info('Nothing to start, exiting')
            return
        # log_to_info('NothingScore: 77.7% to start, exiting')
        main(run=run)
    except Exception:
        logging.exception('Unknown error')

    output = get_log_output(log_handler, log_buffer)
    report_results(run['id'], output)
def run_and_say(**kwargs):
    log_handler, log_buffer = setup_logger(True)
    run_with(**kwargs)
    say_result(log_handler, log_buffer)
from algorithms.Word2Vec_ParagraphVectors import main
from helpers.log_config import setup_logger

if __name__ == '__main__':
    setup_logger()
    main()