def get_mi_weights(bg_corpus, topic_corpus): bg_dict = FreqDist(bg_corpus); bg_item_ratio = dict([(w, (bg_dict[w]+0.0)/(len(bg_corpus) + 0.0)) for w in bg_dict]) topic_dict = FreqDist(topic_corpus); topic_item_ratio = dict([(w, (topic_dict[w]+0.0)/(len(topic_corpus) + 0.0)) for w in topic_dict]) keyitems = [w for w in bg_dict if ((bg_dict[w] >= 5)and(topic_dict.has_key(w)))] mi_weight = dict([(w, math.log(topic_item_ratio[w]/(bg_item_ratio[w]))) for w in keyitems]) return sorted(mi_weight.iteritems(), key=operator.itemgetter(1), reverse=True)