Example #1
0
def train_model(data_filename, num_topics=10, num_passes=1,
    topic_x_word=True, email_x_topic=True, binarize=False):

    print "Loading data..."
    data = eio.load_bow(data_filename)

    if binarize:
        data[np.nonzero(data)] = 1

    #Converting to gensim corpus
    corpus = gensim.matutils.Dense2Corpus(data, documents_columns=False)

    print "Beginning training..."
    model = gensim.models.ldamodel.LdaModel(corpus, 
              num_topics=num_topics, passes=num_passes,
              alpha=0.01,eta=0.01)

    print "Done"
    if email_x_topic:
        ext = run_inference(model, corpus)
    if topic_x_word:
        txw = extract_topic_word_dist(model)

    if email_x_topic and topic_x_word:
        return model, ext, txw
    elif email_x_topic:
        return model, ext
    elif topic_x_word:
        return model, txw
    else:
        return model
Example #2
0
def run_inference_over_file(data_filename, model_filename, save=False,
     num_topics=None, output_prefix=None):

    print "Loading data..."
    data = eio.load_bow(data_filename)

    #Converting to gensim corpus
    corpus = gensim.matutils.Dense2Corpus(data, documents_columns=False)

    print "Loading model..."
    model = mi.load_model( model_filename )

    ext = run_inference(model, corpus)

    if save:
        assert num_topics is not None
        assert output_prefix is not None

        num_emails = ext.shape[0]
        filenames = output_filenames( output_prefix, num_emails, num_topics )

        ext.tofile( filenames['email_x_topic'] )

    return ext