def get_sentence_posteriors(sentence, iterations=1, extra_meaning=None): meaning_probs = {} # parse sentence with charniak and apply surgeries print 'parsing ...' modparse = get_modparse(sentence) t = ParentedTree.parse(modparse) print '\n%s\n' % t.pprint() num_ancestors = count_lmk_phrases(t) - 1 for _ in xrange(iterations): (lmk, _, _), (rel, _, _) = get_meaning(num_ancestors=num_ancestors) meaning = m2s(lmk,rel) if meaning not in meaning_probs: ps = get_tree_probs(t, lmk, rel)[0] # print "Tree probs: ", zip(ps,rls) meaning_probs[meaning] = np.prod(ps) print '.' if extra_meaning: meaning = m2s(*extra_meaning) if meaning not in meaning_probs: ps = get_tree_probs(t, lmk, rel)[0] # print "Tree prob: ", zip(ps,rls) meaning_probs[meaning] = np.prod(ps) print '.' summ = sum(meaning_probs.values()) for key in meaning_probs: meaning_probs[key] /= summ return meaning_probs.items()
def get_sentence_meaning_likelihood(sentence, lmk, rel): modparse = get_modparse(sentence) t = ParentedTree.parse(modparse) print '\n%s\n' % t.pprint() probs, entropies, lrpc, tps = get_tree_probs(t, lmk, rel) if np.prod(probs) == 0.0: logger('ERROR: Probability product is 0 for sentence: %s, lmk: %s, rel: %s, probs: %s' % (sentence, lmk, rel, str(probs))) return np.prod(probs), sum(entropies), lrpc, tps
def get_all_sentence_posteriors(sentence, meanings, golden=False, printing=True): print 'parsing ...' _, modparse = get_modparse(sentence) t = ParentedTree.parse(modparse) print '\n%s\n' % t.pprint() num_ancestors = count_lmk_phrases(t) - 1 # logger(len(meanings)) lmks, rels = zip(*meanings) lmks = set(lmks) rels = set(rels) # logger('num things ' + str(len(lmks))+' '+str(len(rels))) syms = ['\\', '|', '/', '-'] sys.stdout.write('processing...\\') sys.stdout.flush() posteriors = {} for i,lmk in enumerate(lmks): if lmk.get_ancestor_count() != num_ancestors: p = 0 else: ps = get_tree_probs(t[1], lmk, golden=golden, printing=printing)[0] p = np.prod(ps) posteriors[lmk] = p sys.stdout.write("\b%s" % syms[i % len(syms)]) sys.stdout.flush() for i,rel in enumerate(rels): ps = get_tree_probs(t[0], rel=rel, golden=golden, printing=printing)[0] posteriors[rel] = np.prod(ps) sys.stdout.write("\b%s" % syms[i % len(syms)]) sys.stdout.flush() for j in range(50): sys.stdout.write("\b.%s" % syms[(i+j) % len(syms)]) sys.stdout.flush() print # for meaning in meanings: # lmk,rel = meaning # if lmk.get_ancestor_count() != num_ancestors: # p = 0 # else: # ps = get_tree_probs(t, lmk, rel, printing=False)[0] # p = np.prod(ps) # posteriors.append(p) # print p, lmk, lmk.ori_relations, rel, (rel.distance, rel.measurement.best_degree_class, rel.measurement.best_distance_class ) if hasattr(rel,'measurement') else 'No measurement' return posteriors
def get_all_sentence_posteriors(sentence, meanings): print 'parsing ...' modparse = get_modparse(sentence) t = ParentedTree.parse(modparse) print '\n%s\n' % t.pprint() num_ancestors = count_lmk_phrases(t) - 1 posteriors = [] for meaning in meanings: lmk,rel = meaning if lmk.get_ancestor_count() != num_ancestors: p = 0 else: ps = get_tree_probs(t, lmk, rel)[0] p = np.prod(ps) posteriors.append(p) return posteriors
def get_sentence_posteriors(sentence, iterations=1): probs = [] meanings = [] # parse sentence with charniak and apply surgeries print 'parsing ...' modparse = get_modparse(sentence) t = ParentedTree.parse(modparse) print '\n%s\n' % t.pprint() num_ancestors = count_lmk_phrases(t) - 1 for _ in xrange(iterations): meaning = get_meaning(num_ancestors=num_ancestors) lmk, rel = meaning probs.append(get_tree_prob(t, *meaning)) meanings.append(m2s(lmk,rel)) print '.' probs = np.array(probs) / sum(probs) return uniquify_distribution(meanings, probs)
if __name__ == '__main__': import argparse parser = argparse.ArgumentParser() # parser.add_argument('-n', '--num_iterations', type=int, default=1) # parser.add_argument('-l', '--location', type=Point) # parser.add_argument('--consistent', action='store_true') parser.add_argument('sentence') args = parser.parse_args() scene, speaker = construct_training_scene() print 'parsing ...' modparse = get_modparse(args.sentence) t = ParentedTree.parse(modparse) print '\n%s\n' % t.pprint() print_tree_entropy(t) raw_input() parts = build_meaning(t) for part in parts: print "Suggested for", part[0] items = sorted(part[1].items(), key=lambda x: x[1],reverse=True) for item in items: print ' ',rjust(item[0],40),item[1] print
for s in all_scenes: toremove = [] test_scene = {'scene':s['scene'], 'speaker':s['speaker'], 'lmks':[], 'loc_descs':[], 'ids':[]} for i,(lmk,sentence_chunks, eyedee) in enumerate(zip(s['lmks'], s['loc_descs'], s['ids'])): original = list(sentence_chunks) # print i, lmk, # for l in sentence_chunks: # print l,'--', # print for chunk in list(sentence_chunks): try: parsetree, modparsetree = get_modparse(chunk) # print modparsetree # raw_input() # if ('(NP' in modparsetree or '(PP' in modparsetree): # sentence_chunks.remove(chunk) # if 'objects' in chunk: # sentence_chunks.remove(chunk) # elif (' side' in chunk or # 'end' in chunk or # 'edge' in chunk or # 'corner' in chunk or # 'middle' in chunk or # 'center' in chunk or # 'centre' in chunk) and not ('table' in chunk): # sentence_chunks.remove(chunk) # elif 'viewer' in chunk or 'between' in chunk:
for i in range(args.iterations): print 'sentence', i if (i % 50) == 0: scene, speaker = construct_training_scene(True) utils.scene.set_scene(scene,speaker) table = scene.landmarks['table'].representation.rect t_min = table.min_point t_max = table.max_point t_w = table.width t_h = table.height xloc,yloc = random()*t_w+t_min.x, random()*t_h+t_min.y trajector = Landmark( 'point', PointRepresentation(Vec2(xloc,yloc)), None, Landmark.POINT) sentence, rel, lmk = speaker.describe(trajector, scene, False, 1) parsestring, modparsestring = get_modparse(sentence) unique_sentences[sentence] = (parsestring, modparsestring) # convert variables to the right types loc = (xloc, yloc) parse = ParentedTree.parse(parsestring) modparse = ParentedTree.parse(modparsestring) # how many ancestors should the sampled landmark have? num_ancestors = count_lmk_phrases(modparse) - 1 if num_ancestors == -1: print 'Failed to parse %d [%s] [%s] [%s]' % (i, sentence, parse, modparse) continue assert(not isinstance(lmk, tuple))
def train( meaning, sentence, update=1, printing=False): lmk,rel = meaning _, modparse = get_modparse(sentence) t = ParentedTree.parse(modparse) train_rec( tree=t, lmk=lmk, rel=rel, update=update, printing=printing)