예제 #1
0
def train_context_extractor(records, stable=True):
    print('%s: train context topics model' % time.strftime("%Y/%m/%d-%H:%M:%S"))
    if Constants.TOPIC_MODEL_TYPE == 'lda':
        context_extractor = LdaBasedContext(records)
        context_extractor.generate_review_corpus()
        context_extractor.build_topic_model()
        context_extractor.update_reviews_with_topics()
        context_extractor.get_context_rich_topics()
        context_extractor.clear_reviews()
    elif Constants.TOPIC_MODEL_TYPE == 'nmf':
        context_extractor = NmfContextExtractor(records)
        context_extractor.generate_review_bows()
        context_extractor.build_document_term_matrix()
        if stable:
            context_extractor.build_stable_topic_model()
        else:
            context_extractor.build_topic_model()
        context_extractor.update_reviews_with_topics()
        context_extractor.get_context_rich_topics()
        context_extractor.clear_reviews()
    else:
        raise ValueError('Unrecognized topic model type: \'%s\'' %
                         Constants.TOPIC_MODEL_TYPE)

    print('%s: Trained Topic Model' % time.strftime("%Y/%m/%d-%H:%M:%S"))

    return context_extractor