Esempio n. 1
0
            fh.write("%s\t%f\n" % (x[0], x[1]))
        fh.close()


if __name__ == "__main__":

    topic_file = (
        "/data0/projects/fuse/rdg_experimental_lab/experiments/surveyor_2013/final_experiments/code/final_topics.txt"
    )
    topic_fh = open(topic_file, "r")
    topics = []
    for line in topic_fh:
        topics.append(line.strip())

    dataObj = SurveyorData(
        topics[0:5], "/data0/projects/fuse/rdg_experimental_lab/experiments/content_models/data/input_text/"
    )
    # TODO: add caching here so vectors are not computed again and again
    # store vocab and data vectors

    # alpha = [background, content, docspecific]
    alpha = [10.0, 1.0, 5.0]
    beta = [1.0, 0.1, 1.0]

    tsObj = TopicSumWeave(dataObj, alpha, beta)
    tsObj.runGibbs()
    phi = tsObj.phi()

    vocab = dataObj.get_vocab()
    tsObj.write_topic("background", dict(zip(vocab, phi[0])))
Esempio n. 2
0
def write_topic(filekey, word_probs):
    
    fh = open("/data0/projects/fuse/rdg_experimental_lab/experiments/content_models/code/out_topics/"+filekey+".txt", "w")
    for x in sorted(word_probs.iteritems(), key=operator.itemgetter(1), reverse=True):
        fh.write("%s\t%f\n" % (x[0], x[1]))
    fh.close()

if __name__ == "__main__":
    
    topic_file = "/data0/projects/fuse/rdg_experimental_lab/experiments/surveyor_2013/final_experiments/code/final_topics.txt";
    topic_fh = open(topic_file, "r")
    topics = []
    for line in topic_fh:
        topics.append(line.strip())

    dataObj = SurveyorData(topics[0:2], "/data0/projects/fuse/rdg_experimental_lab/experiments/content_models/data/input_text/")
    # TODO: add caching here so vectors are not computed again and again
    # store vocab and data vectors
    vectors = dataObj.process_data()

    # alpha = [background, content, docspecific]
    alpha = [10.0, 1.0, 5.0]
    beta = 0.1
    tsObj = TopicSum(vectors, alpha, beta)
    cur_phi = None
    for it, phi in enumerate(tsObj.run(10)):
        print "Iteration", it
        print "Likelihood", tsObj.loglikelihood()
        all_phi = phi

    vocab = dataObj.get_vocab()