Ejemplo n.º 1
0
def coherence(word_topic, train, test, top=10, window_with=10):
    words_for_probs = set(eval_words_for_probs(word_topic, top))
    prob_ungrams = eval_pob_ungrams(train, words_for_probs)
    prob_conditn = eval_pob_bigrams(train, word_topic.shape[0], words_for_probs, wind_with=window_with)

    pmi = lambda w1, w2: math.log(prob_conditn[w2][w1] / prob_ungrams[w1]) if prob_conditn[w2][w1] != 0.0 else 0.0
    pmis_mean = []
    pmis_median = []

    for t in xrange(word_topic.shape[1]):
        topic_wrds = get_topic(word_topic, topic=t, head=top)
        pmi_t = [pmi(w1, w2) for w1, w2 in all_combine(topic_wrds) if pmi(w1, w2) != 0.0]
        pmis_median.append(np.median(pmi_t))
        pmis_mean.append(np.mean(pmi_t))

    return 'coherence = mean %.2f median %.2f' % (np.mean(pmis_mean), np.median(pmis_median))
Ejemplo n.º 2
0
def coherence(word_topic, train, test, top=10, window_with=10):
    words_for_probs = set(eval_words_for_probs(word_topic, top))
    prob_ungrams = eval_pob_ungrams(train, words_for_probs)
    prob_conditn = eval_pob_bigrams(train,
                                    word_topic.shape[0],
                                    words_for_probs,
                                    wind_with=window_with)

    pmi = lambda w1, w2: math.log(prob_conditn[w2][w1] / prob_ungrams[w1]
                                  ) if prob_conditn[w2][w1] != 0.0 else 0.0
    pmis_mean = []
    pmis_median = []

    for t in xrange(word_topic.shape[1]):
        topic_wrds = get_topic(word_topic, topic=t, head=top)
        pmi_t = [
            pmi(w1, w2) for w1, w2 in all_combine(topic_wrds)
            if pmi(w1, w2) != 0.0
        ]
        pmis_median.append(np.median(pmi_t))
        pmis_mean.append(np.mean(pmi_t))

    return 'coherence = mean %.2f median %.2f' % (np.mean(pmis_mean),
                                                  np.median(pmis_median))
Ejemplo n.º 3
0
def eval_words_for_probs(word_topic, top):
    words = []
    for t in xrange(word_topic.shape[1]):
        words += get_topic(word_topic, topic=t, head=top)

    return words
Ejemplo n.º 4
0
def eval_words_for_probs(word_topic, top):
    words = []
    for t in xrange(word_topic.shape[1]):
        words += get_topic(word_topic, topic=t, head=top)

    return words