def get_words(expn, parent, lmk=None, rel=None): words = [] probs = [] entropy = [] for n in expn.split(): if n in NONTERMINALS: if n == parent == 'LANDMARK-PHRASE': # we need to move to the parent landmark lmk = parent_landmark(lmk) # we need to keep expanding expansion, exp_prob, exp_ent = get_expansion(n, parent, lmk, rel) w, w_prob, w_ent = get_words(expansion, n, lmk, rel) words.append(w) probs.append(exp_prob * w_prob) entropy.append(exp_ent + w_ent) else: # get word for POS w_db = Word.get_words(pos=n, lmk=lmk_id(lmk), rel=rel_type(rel)) counter = collections.Counter(w_db) keys, counts = zip(*counter.items()) counts = np.array(counts) counts /= counts.sum() w, w_prob, w_entropy = categorical_sample(keys, counts) words.append(w.word) probs.append(w.prob) entropy.append(w_entropy) p, H = np.prod(probs), np.sum(entropy) print 'expanding %s to %s (p: %f, H: %f)' % (expn, words, p, H) return words, p, H