fh.write("%s\t%f\n" % (x[0], x[1])) fh.close() if __name__ == "__main__": topic_file = ( "/data0/projects/fuse/rdg_experimental_lab/experiments/surveyor_2013/final_experiments/code/final_topics.txt" ) topic_fh = open(topic_file, "r") topics = [] for line in topic_fh: topics.append(line.strip()) dataObj = SurveyorData( topics[0:5], "/data0/projects/fuse/rdg_experimental_lab/experiments/content_models/data/input_text/" ) # TODO: add caching here so vectors are not computed again and again # store vocab and data vectors # alpha = [background, content, docspecific] alpha = [10.0, 1.0, 5.0] beta = [1.0, 0.1, 1.0] tsObj = TopicSumWeave(dataObj, alpha, beta) tsObj.runGibbs() phi = tsObj.phi() vocab = dataObj.get_vocab() tsObj.write_topic("background", dict(zip(vocab, phi[0])))
def write_topic(filekey, word_probs): fh = open("/data0/projects/fuse/rdg_experimental_lab/experiments/content_models/code/out_topics/"+filekey+".txt", "w") for x in sorted(word_probs.iteritems(), key=operator.itemgetter(1), reverse=True): fh.write("%s\t%f\n" % (x[0], x[1])) fh.close() if __name__ == "__main__": topic_file = "/data0/projects/fuse/rdg_experimental_lab/experiments/surveyor_2013/final_experiments/code/final_topics.txt"; topic_fh = open(topic_file, "r") topics = [] for line in topic_fh: topics.append(line.strip()) dataObj = SurveyorData(topics[0:2], "/data0/projects/fuse/rdg_experimental_lab/experiments/content_models/data/input_text/") # TODO: add caching here so vectors are not computed again and again # store vocab and data vectors vectors = dataObj.process_data() # alpha = [background, content, docspecific] alpha = [10.0, 1.0, 5.0] beta = 0.1 tsObj = TopicSum(vectors, alpha, beta) cur_phi = None for it, phi in enumerate(tsObj.run(10)): print "Iteration", it print "Likelihood", tsObj.loglikelihood() all_phi = phi vocab = dataObj.get_vocab()