Example #1
0
def get_words(expn, parent, lmk=None, rel=None):
    words = []
    probs = []
    entropy = []

    for n in expn.split():
        if n in NONTERMINALS:
            if n == parent == 'LANDMARK-PHRASE':
                # we need to move to the parent landmark
                lmk = parent_landmark(lmk)
            # we need to keep expanding
            expansion, exp_prob, exp_ent = get_expansion(n, parent, lmk, rel)
            w, w_prob, w_ent = get_words(expansion, n, lmk, rel)
            words.append(w)
            probs.append(exp_prob * w_prob)
            entropy.append(exp_ent + w_ent)
        else:
            # get word for POS
            w_db = Word.get_words(pos=n, lmk=lmk_id(lmk), rel=rel_type(rel))
            counter = collections.Counter(w_db)
            keys, counts = zip(*counter.items())
            counts = np.array(counts)
            counts /= counts.sum()
            w, w_prob, w_entropy = categorical_sample(keys, counts)
            words.append(w.word)
            probs.append(w.prob)
            entropy.append(w_entropy)
    p, H = np.prod(probs), np.sum(entropy)
    print 'expanding %s to %s (p: %f, H: %f)' % (expn, words, p, H)
    return words, p, H