def get_sentence_posteriors(sentence, iterations=1, extra_meaning=None): meaning_probs = {} # parse sentence with charniak and apply surgeries print 'parsing ...' modparse = get_modparse(sentence) t = ParentedTree.parse(modparse) print '\n%s\n' % t.pprint() num_ancestors = count_lmk_phrases(t) - 1 for _ in xrange(iterations): (lmk, _, _), (rel, _, _) = get_meaning(num_ancestors=num_ancestors) meaning = m2s(lmk,rel) if meaning not in meaning_probs: ps = get_tree_probs(t, lmk, rel)[0] # print "Tree probs: ", zip(ps,rls) meaning_probs[meaning] = np.prod(ps) print '.' if extra_meaning: meaning = m2s(*extra_meaning) if meaning not in meaning_probs: ps = get_tree_probs(t, lmk, rel)[0] # print "Tree prob: ", zip(ps,rls) meaning_probs[meaning] = np.prod(ps) print '.' summ = sum(meaning_probs.values()) for key in meaning_probs: meaning_probs[key] /= summ return meaning_probs.items()
def get_tree_prob(tree, lmk=None, rel=None): prob = 1.0 if len(tree.productions()) == 1: # if this tree only has one production # it means that its child is a terminal (word) word = tree[0] pos = tree.node p = WordCPT.probability(word=word, pos=pos, lmk=lmk_id(lmk), rel=rel_type(rel)) print p, pos, '->', word, m2s(lmk,rel) prob *= p else: lhs = tree.node rhs = ' '.join(n.node for n in tree) parent = tree.parent().node if tree.parent() else None if lhs == 'RELATION': # everything under a RELATION node should ignore the landmark lmk = None elif lhs == 'LANDMARK-PHRASE': # everything under a LANDMARK-PHRASE node should ignore the relation rel = None if parent == 'LANDMARK-PHRASE': # if the current node is a LANDMARK-PHRASE and the parent node # is also a LANDMARK-PHRASE then we should move to the parent # of the current landmark lmk = parent_landmark(lmk) if not parent: # LOCATION-PHRASE has no parent and is not related to lmk and rel p = ExpansionCPT.probability(rhs=rhs, lhs=lhs) print p, repr(lhs), '->', repr(rhs) else: p = ExpansionCPT.probability(rhs=rhs, lhs=lhs, parent=parent, lmk=lmk_id(lmk), rel=rel_type(rel)) print p, repr(lhs), '->', repr(rhs), 'parent=%r'%parent, m2s(lmk,rel) prob *= p # call get_tree_prob recursively for each subtree for subtree in tree: prob *= get_tree_prob(subtree, lmk, rel) return prob
def generate_sentence(loc, consistent, scene=None, speaker=None): utils.scene = utils.ModelScene(scene, speaker) (lmk, lmk_prob, lmk_ent), (rel, rel_prob, rel_ent) = get_meaning(loc=loc) meaning1 = m2s(lmk, rel) logger(meaning1) while True: rel_exp_chain, rele_prob_chain, rele_ent_chain, rel_terminals, rel_landmarks = get_expansion( 'RELATION', rel=rel) lmk_exp_chain, lmke_prob_chain, lmke_ent_chain, lmk_terminals, lmk_landmarks = get_expansion( 'LANDMARK-PHRASE', lmk=lmk) rel_words, relw_prob, relw_ent, rel_a = get_words( rel_terminals, landmarks=rel_landmarks, rel=rel) lmk_words, lmkw_prob, lmkw_ent, lmk_a = get_words( lmk_terminals, landmarks=lmk_landmarks, prevword=(rel_words[-1] if rel_words else None)) sentence = ' '.join(rel_words + lmk_words) logger('rel_exp_chain: %s' % rel_exp_chain) logger('lmk_exp_chain: %s' % lmk_exp_chain) meaning = Meaning( (lmk, lmk_prob, lmk_ent, rel, rel_prob, rel_ent, rel_exp_chain, rele_prob_chain, rele_ent_chain, rel_terminals, rel_landmarks, lmk_exp_chain, lmke_prob_chain, lmke_ent_chain, lmk_terminals, lmk_landmarks, rel_words, relw_prob, relw_ent, lmk_words, lmkw_prob, lmkw_ent)) meaning.rel_a = rel_a meaning.lmk_a = lmk_a if consistent: # get the most likely meaning for the generated sentence try: posteriors = get_sentence_posteriors(sentence, iterations=10, extra_meaning=(lmk, rel)) except: print 'try again ...' continue meaning2 = max(posteriors, key=itemgetter(1))[0] # is the original meaning the best one? if meaning1 != meaning2: print print 'sentence:', sentence print 'original:', meaning1 print 'interpreted:', meaning2 print 'try again ...' print continue for m, p in sorted(posteriors, key=itemgetter(1)): print m, p return meaning, sentence
def generate_sentence(loc, consistent): while True: lmk, rel = get_meaning(loc=loc) print m2s(lmk, rel) rel_exp, rel_prob, rel_ent = get_expansion('RELATION', rel=rel) lmk_exp, lmk_prob, lmk_ent = get_expansion('LANDMARK-PHRASE', lmk=lmk) rel_words, relw_prob, relw_ent = get_words(rel_exp, 'RELATION', rel=rel) lmk_words, lmkw_prob, lmkw_ent = get_words(lmk_exp, 'LANDMARK-PHRASE', lmk=lmk) sentence = ' '.join(rel_words + lmk_words) if consistent: meaning1 = m2s(lmk,rel) # get the most likely meaning for the generated sentence posteriors = get_sentence_posteriors(sentence) meaning2 = max(posteriors, key=itemgetter(1))[0] # is this what we are trying to say? if meaning1 != meaning2: continue return sentence
def generate_sentence(loc, consistent, scene=None, speaker=None, usebest=False, golden=False, meaning=None, printing=True): utils.scene = utils.ModelScene(scene, speaker) if meaning is None: (lmk, lmk_prob, lmk_ent), (rel, rel_prob, rel_ent) = get_meaning(loc=loc, usebest=usebest) else: lmk, rel = meaning lmk_prob = lmk_ent = rel_prob = rel_ent = None meaning1 = m2s(lmk, rel) logger( meaning1 ) while True: rel_exp_chain, rele_prob_chain, rele_ent_chain, rel_terminals, rel_landmarks = get_expansion('RELATION', rel=rel, usebest=usebest, golden=golden, printing=printing) lmk_exp_chain, lmke_prob_chain, lmke_ent_chain, lmk_terminals, lmk_landmarks = get_expansion('LANDMARK-PHRASE', lmk=lmk, usebest=usebest, golden=golden, printing=printing) rel_words, relw_prob, relw_ent, rel_a = get_words(rel_terminals, landmarks=rel_landmarks, rel=rel, usebest=usebest, golden=golden, printing=printing) lmk_words, lmkw_prob, lmkw_ent, lmk_a = get_words(lmk_terminals, landmarks=lmk_landmarks, prevword=(rel_words[-1] if rel_words else None), usebest=usebest, golden=golden, printing=printing) sentence = ' '.join(rel_words + lmk_words) if printing: logger( 'rel_exp_chain: %s' % rel_exp_chain ) if printing: logger( 'lmk_exp_chain: %s' % lmk_exp_chain ) meaning = Meaning((lmk, lmk_prob, lmk_ent, rel, rel_prob, rel_ent, rel_exp_chain, rele_prob_chain, rele_ent_chain, rel_terminals, rel_landmarks, lmk_exp_chain, lmke_prob_chain, lmke_ent_chain, lmk_terminals, lmk_landmarks, rel_words, relw_prob, relw_ent, lmk_words, lmkw_prob, lmkw_ent)) meaning.rel_a = rel_a meaning.lmk_a = lmk_a if consistent: # get the most likely meaning for the generated sentence try: posteriors = get_sentence_posteriors(sentence, iterations=10, extra_meaning=(lmk,rel)) except: print 'try again ...' continue meaning2 = max(posteriors, key=itemgetter(1))[0] # is the original meaning the best one? if meaning1 != meaning2: print print 'sentence:', sentence print 'original:', meaning1 print 'interpreted:', meaning2 print 'try again ...' print continue for m,p in sorted(posteriors, key=itemgetter(1)): print m, p return meaning, sentence
def get_sentence_posteriors(sentence, iterations=1): probs = [] meanings = [] # parse sentence with charniak and apply surgeries print 'parsing ...' modparse = get_modparse(sentence) t = ParentedTree.parse(modparse) print '\n%s\n' % t.pprint() num_ancestors = count_lmk_phrases(t) - 1 for _ in xrange(iterations): meaning = get_meaning(num_ancestors=num_ancestors) lmk, rel = meaning probs.append(get_tree_prob(t, *meaning)) meanings.append(m2s(lmk,rel)) print '.' probs = np.array(probs) / sum(probs) return uniquify_distribution(meanings, probs)
def loop(num_iterations): min_dists = [] lmk_priors = [] rel_priors = [] lmk_posts = [] rel_posts = [] golden_log_probs = [] golden_entropies = [] golden_ranks = [] rel_types = [] total_mass = [] student_probs = [] student_entropies = [] student_ranks = [] student_rel_types = [] object_answers = [] object_distributions = [] epsilon = 1e-15 for iteration in range(num_iterations): logger(('Iteration %d' % iteration),'okblue') rand_p = Vec2(random()*table.width+table.min_point.x, random()*table.height+table.min_point.y) trajector = Landmark( 'point', PointRepresentation(rand_p), None, Landmark.POINT ) if initial_training: sentence, sampled_relation, sampled_landmark = speaker.describe(trajector, scene, False, 1) if num_samples: for i in range(num_samples): landmark, relation, _ = speaker.sample_meaning(trajector, scene, 1) train((landmark,relation), sentence, update=1, printing=printing) else: for (landmark,relation),prob in speaker.all_meaning_probs( trajector, scene, 1 ): train((landmark,relation), sentence, update=prob, printing=printing) else: meaning, sentence = generate_sentence(rand_p, consistent, scene, speaker, usebest=True, printing=printing) logger( 'Generated sentence: %s' % sentence) if cheating: landmark, relation = meaning.args[0],meaning.args[3] else: if explicit_pointing: landmark = meaning.args[0] if ambiguous_pointing: pointing_point = landmark.representation.middle + Vec2(random()*0.1-0.05,random()*0.1-0.05) #_, bestsentence = generate_sentence(rand_p, consistent, scene, speaker, usebest=True, printing=printing) try: golden_posteriors = get_all_sentence_posteriors(sentence, meanings, golden=True, printing=printing) except ParseError as e: logger( e ) prob = 0 rank = len(meanings)-1 entropy = 0 ed = len(sentence) golden_log_probs.append( prob ) golden_entropies.append( entropy ) golden_ranks.append( rank ) min_dists.append( ed ) continue epsilon = 1e-15 ps = [[golden_posteriors[lmk]*golden_posteriors[rel],(lmk,rel)] for lmk, rel in meanings if ((not explicit_pointing) or lmk == landmark)] if not explicit_pointing: all_lmk_probs = speaker.all_landmark_probs(landmarks, Landmark(None, PointRepresentation(rand_p), None)) all_lmk_probs = dict(zip(landmarks, all_lmk_probs)) if ambiguous_pointing: all_lmk_pointing_probs = speaker.all_landmark_probs(landmarks, Landmark(None, PointRepresentation(pointing_point), None)) all_lmk_pointing_probs = dict(zip(landmarks, all_lmk_pointing_probs)) temp = None for i,(p,(lmk,rel)) in enumerate(ps): # lmk,rel = meanings[i] # logger( '%f, %s' % (p, m2s(lmk,rel))) head_on = speaker.get_head_on_viewpoint(lmk) if not explicit_pointing: # ps[i][0] *= speaker.get_landmark_probability(lmk, landmarks, PointRepresentation(rand_p))[0] ps[i][0] *= all_lmk_probs[lmk] if ambiguous_pointing: # ps[i][0] *= speaker.get_landmark_probability(lmk, landmarks, PointRepresentation(pointing_point))[0] ps[i][0] *= all_lmk_pointing_probs[lmk] ps[i][0] *= speaker.get_probabilities_points( np.array([rand_p]), rel, head_on, lmk)[0] if lmk == meaning.args[0] and rel == meaning.args[3]: temp = i ps,_meanings = zip(*ps) print ps ps = np.array(ps) ps += epsilon ps = ps/ps.sum() temp = ps[temp] ps = sorted(zip(ps,_meanings),reverse=True) logger( 'Attempted to say: %s' % m2s(meaning.args[0],meaning.args[3]) ) logger( 'Interpreted as: %s' % m2s(ps[0][1][0],ps[0][1][1]) ) logger( 'Attempted: %s vs Interpreted: %s' % (str(temp), str(ps[0][0]))) # logger( 'Golden entropy: %f, Max entropy %f' % (golden_entropy, max_entropy)) landmark, relation = ps[0][1] head_on = speaker.get_head_on_viewpoint(landmark) all_descs = speaker.get_all_meaning_descriptions(trajector, scene, landmark, relation, head_on, 1) distances = [] for desc in all_descs: distances.append([edit_distance( sentence, desc ), desc]) distances.sort() print distances correction = distances[0][1] if correction == sentence: correction = None logger( 'No correction!!!!!!!!!!!!!!!!!!', 'okgreen' ) accept_correction( meaning, correction, update_scale=scale, eval_lmk=(not explicit_pointing), multiply=multiply, printing=printing ) def probs_metric(inverse=False): bestmeaning, bestsentence = generate_sentence(rand_p, consistent, scene, speaker, usebest=True, golden=inverse, printing=printing) sampled_landmark, sampled_relation = bestmeaning.args[0], bestmeaning.args[3] try: golden_posteriors = get_all_sentence_posteriors(bestsentence, meanings, golden=(not inverse), printing=printing) # lmk_prior = speaker.get_landmark_probability(sampled_landmark, landmarks, PointRepresentation(rand_p))[0] all_lmk_probs = speaker.all_landmark_probs(landmarks, Landmark(None, PointRepresentation(rand_p), None)) all_lmk_probs = dict(zip(landmarks, all_lmk_probs)) lmk_prior = all_lmk_probs[sampled_landmark] head_on = speaker.get_head_on_viewpoint(sampled_landmark) rel_prior = speaker.get_probabilities_points( np.array([rand_p]), sampled_relation, head_on, sampled_landmark) lmk_post = golden_posteriors[sampled_landmark] rel_post = golden_posteriors[sampled_relation] ps = np.array([golden_posteriors[lmk]*golden_posteriors[rel] for lmk, rel in meanings]) rank = None for i,p in enumerate(ps): lmk,rel = meanings[i] # logger( '%f, %s' % (p, m2s(lmk,rel))) head_on = speaker.get_head_on_viewpoint(lmk) # ps[i] *= speaker.get_landmark_probability(lmk, landmarks, PointRepresentation(rand_p))[0] ps[i] *= all_lmk_probs[lmk] ps[i] *= speaker.get_probabilities_points( np.array([rand_p]), rel, head_on, lmk) if lmk == sampled_landmark and rel == sampled_relation: idx = i ps += epsilon ps = ps/ps.sum() prob = ps[idx] rank = sorted(ps, reverse=True).index(prob) entropy = entropy_of_probs(ps) except ParseError as e: logger( e ) lmk_prior = 0 rel_prior = 0 lmk_post = 0 rel_post = 0 prob = 0 rank = len(meanings)-1 entropy = 0 distances = [[None]] head_on = speaker.get_head_on_viewpoint(sampled_landmark) all_descs = speaker.get_all_meaning_descriptions(trajector, scene, sampled_landmark, sampled_relation, head_on, 1) distances = [] for desc in all_descs: distances.append([edit_distance( bestsentence, desc ), desc]) distances.sort() return lmk_prior,rel_prior,lmk_post,rel_post,\ prob,entropy,rank,distances[0][0],type(sampled_relation) def db_mass(): total = CProduction.get_production_sum(None) total += CWord.get_word_sum(None) return total def choosing_object_metric(): trajector = choice(loi) sentence, sampled_relation, sampled_landmark = speaker.describe(trajector, scene, max_level=1) lmk_probs = [] try: combined_heatmaps = heatmaps_for_sentence(sentence, all_meanings, loi_infos, xs, ys, scene, speaker, step=step) for combined_heatmap,obj_lmk in zip(combined_heatmaps, loi): ps = [p for (x,y),p in zip(list(product(xs,ys)),combined_heatmap) if obj_lmk.representation.contains_point( Vec2(x,y) )] # print ps, xs.shape, ys.shape, combined_heatmap.shape lmk_probs.append( (sum(ps)/len(ps), obj_lmk) ) lmk_probs = sorted(lmk_probs, reverse=True) top_p, top_lmk = lmk_probs[0] lprobs, lmkss = zip(*lmk_probs) logger( sorted(zip(np.array(lprobs)/sum(lprobs), [(l.name, l.color, l.object_class) for l in lmkss]), reverse=True) ) logger( 'I bet %f you are talking about a %s %s %s' % (top_p/sum(lprobs), top_lmk.name, top_lmk.color, top_lmk.object_class) ) # objects.append(top_lmk) except Exception as e: logger( 'Unable to get object from sentence. %s' % e, 'fail' ) print traceback.format_exc() exit() return loi.index(trajector), [ (lprob, loi.index(lmk)) for lprob,lmk in lmk_probs ] if golden_metric: lmk_prior,rel_prior,lmk_post,rel_post,prob,entropy,rank,ed,rel_type = probs_metric() else: lmk_prior,rel_prior,lmk_post,rel_post,prob,entropy,rank,ed,rel_type = \ None, None, None, None, None, None, None, None, None lmk_priors.append( lmk_prior ) rel_priors.append( rel_prior ) lmk_posts.append( lmk_post ) rel_posts.append( rel_post ) golden_log_probs.append( prob ) golden_entropies.append( entropy ) golden_ranks.append( rank ) min_dists.append( ed ) rel_types.append( rel_type ) if mass_metric: total_mass.append( db_mass() ) else: total_mass.append( None ) if student_metric: _,_,_,_,student_prob,student_entropy,student_rank,_,student_rel_type = probs_metric(inverse=True) else: _,_,_,_,student_prob,student_entropy,student_rank,_,student_rel_type = \ None, None, None, None, None, None, None, None, None student_probs.append( student_prob ) student_entropies.append( student_entropy ) student_ranks.append( student_rank ) student_rel_types.append( student_rel_type ) if choosing_metric: answer, distribution = choosing_object_metric() else: answer, distribution = None, None object_answers.append( answer ) object_distributions.append( distribution ) return zip(lmk_priors, rel_priors, lmk_posts, rel_posts, golden_log_probs, golden_entropies, golden_ranks, min_dists, rel_types, total_mass, student_probs, student_entropies, student_ranks, student_rel_types, object_answers, object_distributions)
def loop(data): time.sleep(data['delay']) scene = data['scene'] speaker = data['speaker'] utils.scene.set_scene(scene,speaker) num_iterations = len(data['loc_descs']) all_meanings = data['all_meanings'] loi = data['loi'] loi_infos = data['loi_infos'] landmarks = data['landmarks'] sorted_meaning_lists = data['sorted_meaning_lists'] learn_objects = data['learn_objects'] def heatmaps_for_sentences(sentences, all_meanings, loi_infos, xs, ys, scene, speaker, step=0.02): printing=False x = np.array( [list(xs-step*0.5)]*len(ys) ) y = np.array( [list(ys-step*0.5)]*len(xs) ).T scene_bb = scene.get_bounding_box() scene_bb = scene_bb.inflate( Vec2(scene_bb.width*0.5,scene_bb.height*0.5) ) combined_heatmaps = [] for obj_lmk, ms, heatmapss in loi_infos: combined_heatmap = None for sentence in sentences: posteriors = None while not posteriors: try: posteriors = get_all_sentence_posteriors(sentence, all_meanings, printing=printing) except ParseError as pe: raise pe except Exception as e: print e sleeptime = random()*0.5 logger('Sleeping for %f and retrying "%s"' % (sleeptime,sentence)) time.sleep(sleeptime) continue big_heatmap1 = None for m,(h1,h2) in zip(ms, heatmapss): lmk,rel = m p = posteriors[rel]*posteriors[lmk] if big_heatmap1 is None: big_heatmap1 = p*h1 else: big_heatmap1 += p*h1 if combined_heatmap is None: combined_heatmap = big_heatmap1 else: combined_heatmap *= big_heatmap1 combined_heatmaps.append(combined_heatmap) return combined_heatmaps object_answers = [] object_distributions = [] object_sentences =[] object_ids = [] epsilon = 1e-15 for iteration in range(num_iterations): logger(('Iteration %d comprehension' % iteration),'okblue') trajector = data['lmks'][iteration] if trajector is None: trajector = choice(loi) logger( 'Teacher chooses: %s' % trajector ) probs, sorted_meanings = zip(*sorted_meaning_lists[trajector][:30]) probs = np.array(probs)# - min(probs) probs /= probs.sum() sentences = data['loc_descs'][iteration] if sentences is None: (sampled_landmark, sampled_relation) = categorical_sample( sorted_meanings, probs )[0] logger( 'Teacher tries to say: %s' % m2s(sampled_landmark,sampled_relation) ) head_on = speaker.get_head_on_viewpoint(sampled_landmark) sentences = [describe( head_on, trajector, sampled_landmark, sampled_relation )] object_sentences.append( ' '.join(sentences) ) object_ids.append( data['ids'][iteration] ) logger( 'Teacher says: %s' % ' '.join(sentences)) for i,(p,sm) in enumerate(zip(probs[:15],sorted_meanings[:15])): lm,re = sm logger( '%i: %f %s' % (i,p,m2s(*sm)) ) lmk_probs = [] try: combined_heatmaps = heatmaps_for_sentences(sentences, all_meanings, loi_infos, xs, ys, scene, speaker, step=step) except ParseError as e: logger( 'Unable to get object from sentence. %s' % e, 'fail' ) top_lmk = None distribution = [(0, False, False)] else: for combined_heatmap,obj_lmk in zip(combined_heatmaps, loi): ps = [p for (x,y),p in zip(list(product(xs,ys)),combined_heatmap) if obj_lmk.representation.contains_point( Vec2(x,y) )] # print ps, xs.shape, ys.shape, combined_heatmap.shape lmk_probs.append( (sum(ps)/len(ps), obj_lmk) ) lmk_probs = sorted(lmk_probs, reverse=True) top_p, top_lmk = lmk_probs[0] lprobs, lmkss = zip(*lmk_probs) distribution = [ (lprob, lmk.name, loi.index(lmk)) for lprob,lmk in lmk_probs ] logger( sorted(zip(np.array(lprobs)/sum(lprobs), [(l.name, l.color, l.object_class) for l in lmkss]), reverse=True) ) logger( 'I bet %f you are talking about a %s %s %s' % (top_p/sum(lprobs), top_lmk.name, top_lmk.color, top_lmk.object_class) ) # objects.append(top_lmk) answer = (trajector.name,loi.index(trajector)) object_answers.append( answer ) object_distributions.append( distribution ) # Present top_lmk to teacher logger("top_lmk == trajector: %r, learn_objects: %r" % (top_lmk == trajector,learn_objects), 'okgreen') if top_lmk == trajector or not learn_objects: # Give morphine logger("Ahhhhh, morphine...", 'okgreen') pass else: logger("LEARNING!!!!!!!!!!!", 'okgreen') updates, _ = zip(*sorted_meaning_lists[trajector][:30]) howmany=5 for sentence in sentences: for _ in range(howmany): meaning = categorical_sample( sorted_meanings, probs )[0] update = updates[ sorted_meanings.index(meaning) ] try: accept_object_correction( meaning, sentence, update*scale, printing=printing) except: pass for update, meaning in sorted_meaning_lists[trajector][-howmany:]: try: accept_object_correction( meaning, sentence, update*scale, printing=printing) except: pass return zip(object_answers, object_distributions, object_sentences, object_ids)
def loop(data): time.sleep(random()) if 'num_iterations' in data: scene, speaker = construct_training_scene(True) num_iterations = data['num_iterations'] else: scene = data['scene'] speaker = data['speaker'] num_iterations = len(data['loc_descs']) utils.scene.set_scene(scene,speaker) scene_bb = scene.get_bounding_box() scene_bb = scene_bb.inflate( Vec2(scene_bb.width*0.5,scene_bb.height*0.5) ) table = scene.landmarks['table'].representation.get_geometry() # step = 0.04 loi = [lmk for lmk in scene.landmarks.values() if lmk.name != 'table'] all_heatmaps_tupless, xs, ys = speaker.generate_all_heatmaps(scene, step=step, loi=loi) loi_infos = [] all_meanings = set() for obj_lmk,all_heatmaps_tuples in zip(loi, all_heatmaps_tupless): lmks, rels, heatmapss = zip(*all_heatmaps_tuples) meanings = zip(lmks,rels) # print meanings all_meanings.update(meanings) loi_infos.append( (obj_lmk, meanings, heatmapss) ) all_heatmaps_tupless, xs, ys = speaker.generate_all_heatmaps(scene, step=step) all_heatmaps_tuples = all_heatmaps_tupless[0] # x = np.array( [list(xs-step*0.5)]*len(ys) ) # y = np.array( [list(ys-step*0.5)]*len(xs) ).T # for lamk, rel, (heatmap1,heatmap2) in all_heatmaps_tuples: # logger( m2s(lamk,rel)) # if isinstance(rel, DistanceRelation): # probabilities = heatmap2.reshape( (len(xs),len(ys)) ).T # plt.pcolor(x, y, probabilities, cmap = 'jet', edgecolors='none', alpha=0.7) # plt.colorbar() # for lmk in scene.landmarks.values(): # if isinstance(lmk.representation, GroupLineRepresentation): # xxs = [lmk.representation.line.start.x, lmk.representation.line.end.x] # yys = [lmk.representation.line.start.y, lmk.representation.line.end.y] # plt.fill(xxs,yys,facecolor='none',linewidth=2) # elif isinstance(lmk.representation, RectangleRepresentation): # rect = lmk.representation.rect # xxs = [rect.min_point.x,rect.min_point.x,rect.max_point.x,rect.max_point.x] # yys = [rect.min_point.y,rect.max_point.y,rect.max_point.y,rect.min_point.y] # plt.fill(xxs,yys,facecolor='none',linewidth=2) # plt.text(rect.min_point.x+0.01,rect.max_point.y+0.02,lmk.name) # plt.title(m2s(lamk,rel)) # logger("Showing") # plt.show() # logger("End") x = np.array( [list(xs-step*0.5)]*len(ys) ) y = np.array( [list(ys-step*0.5)]*len(xs) ).T lmks, rels, heatmapss = zip(*all_heatmaps_tuples) # graphmax1 = graphmax2 = 0 meanings = zip(lmks,rels) landmarks = list(set(lmks)) # relations = list(set(rels)) epsilon = 0.0001 def heatmaps_for_sentences(sentences, all_meanings, loi_infos, xs, ys, scene, speaker, step=0.02): printing=False x = np.array( [list(xs-step*0.5)]*len(ys) ) y = np.array( [list(ys-step*0.5)]*len(xs) ).T scene_bb = scene.get_bounding_box() scene_bb = scene_bb.inflate( Vec2(scene_bb.width*0.5,scene_bb.height*0.5) ) # x = np.array( [list(xs-step*0.5)]*len(ys) ) # y = np.array( [list(ys-step*0.5)]*len(xs) ).T combined_heatmaps = [] for obj_lmk, ms, heatmapss in loi_infos: # for m,(h1,h2) in zip(ms, heatmapss): # logger( h1.shape ) # logger( x.shape ) # logger( y.shape ) # logger( xs.shape ) # logger( ys.shape ) # plt.pcolor(x, y, h1.reshape((len(xs),len(ys))).T, cmap = 'jet', edgecolors='none', alpha=0.7) # plt.colorbar() # for lmk in scene.landmarks.values(): # if isinstance(lmk.representation, GroupLineRepresentation): # xxs = [lmk.representation.line.start.x, lmk.representation.line.end.x] # yys = [lmk.representation.line.start.y, lmk.representation.line.end.y] # plt.fill(xxs,yys,facecolor='none',linewidth=2) # elif isinstance(lmk.representation, RectangleRepresentation): # rect = lmk.representation.rect # xxs = [rect.min_point.x,rect.min_point.x,rect.max_point.x,rect.max_point.x] # yys = [rect.min_point.y,rect.max_point.y,rect.max_point.y,rect.min_point.y] # plt.fill(xxs,yys,facecolor='none',linewidth=2) # plt.text(rect.min_point.x+0.01,rect.max_point.y+0.02,lmk.name) # plt.title(m2s(*m)) # logger( m2s(*m)) # plt.axis('scaled') # plt.show() combined_heatmap = None for sentence in sentences: posteriors = get_all_sentence_posteriors(sentence, all_meanings, printing=printing) big_heatmap1 = None for m,(h1,h2) in zip(ms, heatmapss): lmk,rel = m p = posteriors[rel]*posteriors[lmk] if big_heatmap1 is None: big_heatmap1 = p*h1 else: big_heatmap1 += p*h1 if combined_heatmap is None: combined_heatmap = big_heatmap1 else: combined_heatmap *= big_heatmap1 combined_heatmaps.append(combined_heatmap) return combined_heatmaps object_meaning_applicabilities = {} for obj_lmk, ms, heatmapss in loi_infos: for m,(h1,h2) in zip(ms, heatmapss): ps = [p for (x,y),p in zip(list(product(xs,ys)),h1) if obj_lmk.representation.contains_point( Vec2(x,y) )] if m not in object_meaning_applicabilities: object_meaning_applicabilities[m] = {} object_meaning_applicabilities[m][obj_lmk] = sum(ps)/len(ps) k = len(loi) for meaning_dict in object_meaning_applicabilities.values(): total = sum( meaning_dict.values() ) if total != 0: for obj_lmk in meaning_dict.keys(): meaning_dict[obj_lmk] = meaning_dict[obj_lmk]/total - 1.0/k total = sum( [value for value in meaning_dict.values() if value > 0] ) for obj_lmk in meaning_dict.keys(): meaning_dict[obj_lmk] = (2 if meaning_dict[obj_lmk] > 0 else 1)*meaning_dict[obj_lmk] - total sorted_meaning_lists = {} for m in object_meaning_applicabilities.keys(): for obj_lmk in object_meaning_applicabilities[m].keys(): if obj_lmk not in sorted_meaning_lists: sorted_meaning_lists[obj_lmk] = [] sorted_meaning_lists[obj_lmk].append( (object_meaning_applicabilities[m][obj_lmk], m) ) for obj_lmk in sorted_meaning_lists.keys(): sorted_meaning_lists[obj_lmk].sort(reverse=True) min_dists = [] lmk_priors = [] rel_priors = [] lmk_posts = [] rel_posts = [] golden_log_probs = [] golden_entropies = [] golden_ranks = [] rel_types = [] total_mass = [] student_probs = [] student_entropies = [] student_ranks = [] student_rel_types = [] object_answers = [] object_distributions = [] object_sentences =[] epsilon = 1e-15 for iteration in range(num_iterations): logger(('Iteration %d comprehension' % iteration),'okblue') if 'loc_descs' in data: trajector = data['lmks'][iteration] logger( 'Teacher chooses: %s' % trajector ) sentences = data['loc_descs'][iteration] probs, sorted_meanings = zip(*sorted_meaning_lists[trajector][:30]) probs = np.array(probs)# - min(probs) probs /= probs.sum() if sentences is None: (sampled_landmark, sampled_relation) = categorical_sample( sorted_meanings, probs )[0] logger( 'Teacher tries to say: %s' % m2s(sampled_landmark,sampled_relation) ) head_on = speaker.get_head_on_viewpoint(sampled_landmark) sentences = [describe( head_on, trajector, sampled_landmark, sampled_relation )] else: # Teacher describe trajector = choice(loi) # sentence, sampled_relation, sampled_landmark = speaker.describe(trajector, scene, max_level=1) logger( 'Teacher chooses: %s' % trajector ) # Choose from meanings probs, sorted_meanings = zip(*sorted_meaning_lists[trajector][:30]) probs = np.array(probs)# - min(probs) probs /= probs.sum() (sampled_landmark, sampled_relation) = categorical_sample( sorted_meanings, probs )[0] logger( 'Teacher tries to say: %s' % m2s(sampled_landmark,sampled_relation) ) # Generate sentence # _, sentence = generate_sentence(None, False, scene, speaker, meaning=(sampled_landmark, sampled_relation), golden=True, printing=printing) sentences = [describe( speaker.get_head_on_viewpoint(sampled_landmark), trajector, sampled_landmark, sampled_relation )] object_sentences.append( ' '.join(sentences) ) logger( 'Teacher says: %s' % ' '.join(sentences)) for i,(p,sm) in enumerate(zip(probs[:15],sorted_meanings[:15])): lm,re = sm logger( '%i: %f %s' % (i,p,m2s(*sm)) ) # head_on = speaker.get_head_on_viewpoint(lm) # speaker.visualize( scene, trajector, head_on, lm, re) lmk_probs = [] try: combined_heatmaps = heatmaps_for_sentences(sentences, all_meanings, loi_infos, xs, ys, scene, speaker, step=step) for combined_heatmap,obj_lmk in zip(combined_heatmaps, loi): # x = np.array( [list(xs-step*0.5)]*len(ys) ) # y = np.array( [list(ys-step*0.5)]*len(xs) ).T # logger( combined_heatmap.shape ) # logger( x.shape ) # logger( y.shape ) # logger( xs.shape ) # logger( ys.shape ) # plt.pcolor(x, y, combined_heatmap.reshape((len(xs),len(ys))).T, cmap = 'jet', edgecolors='none', alpha=0.7) # plt.colorbar() # for lmk in scene.landmarks.values(): # if isinstance(lmk.representation, GroupLineRepresentation): # xxs = [lmk.representation.line.start.x, lmk.representation.line.end.x] # yys = [lmk.representation.line.start.y, lmk.representation.line.end.y] # plt.fill(xxs,yys,facecolor='none',linewidth=2) # elif isinstance(lmk.representation, RectangleRepresentation): # rect = lmk.representation.rect # xxs = [rect.min_point.x,rect.min_point.x,rect.max_point.x,rect.max_point.x] # yys = [rect.min_point.y,rect.max_point.y,rect.max_point.y,rect.min_point.y] # plt.fill(xxs,yys,facecolor='none',linewidth=2) # plt.text(rect.min_point.x+0.01,rect.max_point.y+0.02,lmk.name) # plt.axis('scaled') # plt.axis([scene_bb.min_point.x, scene_bb.max_point.x, scene_bb.min_point.y, scene_bb.max_point.y]) # plt.show() ps = [p for (x,y),p in zip(list(product(xs,ys)),combined_heatmap) if obj_lmk.representation.contains_point( Vec2(x,y) )] # print ps, xs.shape, ys.shape, combined_heatmap.shape lmk_probs.append( (sum(ps)/len(ps), obj_lmk) ) lmk_probs = sorted(lmk_probs, reverse=True) top_p, top_lmk = lmk_probs[0] lprobs, lmkss = zip(*lmk_probs) answer, distribution = loi.index(trajector), [ (lprob, loi.index(lmk)) for lprob,lmk in lmk_probs ] logger( sorted(zip(np.array(lprobs)/sum(lprobs), [(l.name, l.color, l.object_class) for l in lmkss]), reverse=True) ) logger( 'I bet %f you are talking about a %s %s %s' % (top_p/sum(lprobs), top_lmk.name, top_lmk.color, top_lmk.object_class) ) # objects.append(top_lmk) except Exception as e: logger( 'Unable to get object from sentence. %s' % e, 'fail' ) answer = None top_lmk = None distribution = [(0,False)] object_answers.append( answer ) object_distributions.append( distribution ) # Present top_lmk to teacher if top_lmk == trajector: # Give morphine pass else: updates, _ = zip(*sorted_meaning_lists[trajector][:30]) howmany=5 for sentence in sentences: for _ in range(howmany): meaning = categorical_sample( sorted_meanings, probs )[0] update = updates[ sorted_meanings.index(meaning) ] try: accept_object_correction( meaning, sentence, update*scale, printing=printing) except: pass for update, meaning in sorted_meaning_lists[trajector][-howmany:]: try: accept_object_correction( meaning, sentence, update*scale, printing=printing) except: pass for _ in range(0): logger(('Iteration %d production' % iteration),'okblue') rand_p = Vec2(random()*table.width+table.min_point.x, random()*table.height+table.min_point.y) trajector = Landmark( 'point', PointRepresentation(rand_p), None, Landmark.POINT ) meaning, sentence = generate_sentence(rand_p, False, scene, speaker, usebest=True, printing=printing) logger( 'Generated sentence: %s' % sentence) landmark = meaning.args[0] # if ambiguous_pointing: # pointing_point = landmark.representation.middle + Vec2(random()*0.1-0.05,random()*0.1-0.05) #_, bestsentence = generate_sentence(rand_p, False, scene, speaker, usebest=True, printing=printing) try: golden_posteriors = get_all_sentence_posteriors(sentence, meanings, golden=True, printing=printing) except ParseError as e: logger( e ) prob = 0 rank = len(meanings)-1 entropy = 0 ed = len(sentence) golden_log_probs.append( prob ) golden_entropies.append( entropy ) golden_ranks.append( rank ) min_dists.append( ed ) continue epsilon = 1e-15 ps = [[golden_posteriors[lmk]*golden_posteriors[rel],(lmk,rel)] for lmk, rel in meanings if (lmk == landmark)] all_lmk_probs = speaker.all_landmark_probs(landmarks, Landmark(None, PointRepresentation(rand_p), None)) all_lmk_probs = dict(zip(landmarks, all_lmk_probs)) temp = None for i,(p,(lmk,rel)) in enumerate(ps): # lmk,rel = meanings[i] # logger( '%f, %s' % (p, m2s(lmk,rel))) head_on = speaker.get_head_on_viewpoint(lmk) ps[i][0] *= speaker.get_probabilities_points( np.array([rand_p]), rel, head_on, lmk)[0] if lmk == meaning.args[0] and rel == meaning.args[3]: temp = i ps,_meanings = zip(*ps) print ps ps = np.array(ps) ps += epsilon ps = ps/ps.sum() temp = ps[temp] ps = sorted(zip(ps,_meanings),reverse=True) logger( 'Attempted to say: %s' % m2s(meaning.args[0],meaning.args[3]) ) logger( 'Interpreted as: %s' % m2s(ps[0][1][0],ps[0][1][1]) ) logger( 'Attempted: %f vs Interpreted: %f' % (temp, ps[0][0])) # logger( 'Golden entropy: %f, Max entropy %f' % (golden_entropy, max_entropy)) landmark, relation = ps[0][1] head_on = speaker.get_head_on_viewpoint(landmark) all_descs = speaker.get_all_meaning_descriptions(trajector, scene, landmark, relation, head_on, 1) distances = [] for desc in all_descs: distances.append([edit_distance( sentence, desc ), desc]) distances.sort() print distances correction = distances[0][1] # if correction == sentence: # correction = None # logger( 'No correction!!!!!!!!!!!!!!!!!!', 'okgreen' ) accept_correction( meaning, correction, update_scale=scale, eval_lmk=False, multiply=False, printing=printing ) def probs_metric(inverse=False): rand_p = Vec2(random()*table.width+table.min_point.x, random()*table.height+table.min_point.y) try: bestmeaning, bestsentence = generate_sentence(rand_p, False, scene, speaker, usebest=True, golden=inverse, printing=printing) sampled_landmark, sampled_relation = bestmeaning.args[0], bestmeaning.args[3] golden_posteriors = get_all_sentence_posteriors(bestsentence, meanings, golden=(not inverse), printing=printing) # lmk_prior = speaker.get_landmark_probability(sampled_landmark, landmarks, PointRepresentation(rand_p))[0] all_lmk_probs = speaker.all_landmark_probs(landmarks, Landmark(None, PointRepresentation(rand_p), None)) all_lmk_probs = dict(zip(landmarks, all_lmk_probs)) lmk_prior = all_lmk_probs[sampled_landmark] head_on = speaker.get_head_on_viewpoint(sampled_landmark) rel_prior = speaker.get_probabilities_points( np.array([rand_p]), sampled_relation, head_on, sampled_landmark) lmk_post = golden_posteriors[sampled_landmark] rel_post = golden_posteriors[sampled_relation] ps = np.array([golden_posteriors[lmk]*golden_posteriors[rel] for lmk, rel in meanings]) rank = None for i,p in enumerate(ps): lmk,rel = meanings[i] # logger( '%f, %s' % (p, m2s(lmk,rel))) head_on = speaker.get_head_on_viewpoint(lmk) # ps[i] *= speaker.get_landmark_probability(lmk, landmarks, PointRepresentation(rand_p))[0] ps[i] *= all_lmk_probs[lmk] ps[i] *= speaker.get_probabilities_points( np.array([rand_p]), rel, head_on, lmk) if lmk == sampled_landmark and rel == sampled_relation: idx = i ps += epsilon ps = ps/ps.sum() prob = ps[idx] rank = sorted(ps, reverse=True).index(prob) entropy = entropy_of_probs(ps) except (ParseError,RuntimeError) as e: logger( e ) lmk_prior = 0 rel_prior = 0 lmk_post = 0 rel_post = 0 prob = 0 rank = len(meanings)-1 entropy = 0 distances = [[None]] head_on = speaker.get_head_on_viewpoint(sampled_landmark) all_descs = speaker.get_all_meaning_descriptions(trajector, scene, sampled_landmark, sampled_relation, head_on, 1) distances = [] for desc in all_descs: distances.append([edit_distance( bestsentence, desc ), desc]) distances.sort() return lmk_prior,rel_prior,lmk_post,rel_post,\ prob,entropy,rank,distances[0][0],type(sampled_relation) def db_mass(): total = CProduction.get_production_sum(None) total += CWord.get_word_sum(None) return total def choosing_object_metric(): trajector = choice(loi) sentence, sampled_relation, sampled_landmark = speaker.describe(trajector, scene, max_level=1) lmk_probs = [] try: combined_heatmaps = heatmaps_for_sentence(sentence, all_meanings, loi_infos, xs, ys, scene, speaker, step=step) for combined_heatmap,obj_lmk in zip(combined_heatmaps, loi): ps = [p for (x,y),p in zip(list(product(xs,ys)),combined_heatmap) if obj_lmk.representation.contains_point( Vec2(x,y) )] # print ps, xs.shape, ys.shape, combined_heatmap.shape lmk_probs.append( (sum(ps)/len(ps), obj_lmk) ) lmk_probs = sorted(lmk_probs, reverse=True) top_p, top_lmk = lmk_probs[0] lprobs, lmkss = zip(*lmk_probs) logger( sorted(zip(np.array(lprobs)/sum(lprobs), [(l.name, l.color, l.object_class) for l in lmkss]), reverse=True) ) logger( 'I bet %f you are talking about a %s %s %s' % (top_p/sum(lprobs), top_lmk.name, top_lmk.color, top_lmk.object_class) ) # objects.append(top_lmk) except Exception as e: logger( 'Unable to get object from sentence. %s' % e, 'fail' ) print traceback.format_exc() exit() return loi.index(trajector), [ (lprob, loi.index(lmk)) for lprob,lmk in lmk_probs ] if golden_metric: lmk_prior,rel_prior,lmk_post,rel_post,prob,entropy,rank,ed,rel_type = probs_metric() else: lmk_prior,rel_prior,lmk_post,rel_post,prob,entropy,rank,ed,rel_type = [None]*9 lmk_priors.append( lmk_prior ) rel_priors.append( rel_prior ) lmk_posts.append( lmk_post ) rel_posts.append( rel_post ) golden_log_probs.append( prob ) golden_entropies.append( entropy ) golden_ranks.append( rank ) min_dists.append( ed ) rel_types.append( rel_type ) if mass_metric: total_mass.append( db_mass() ) else: total_mass.append( None ) if student_metric: _,_,_,_,student_prob,student_entropy,student_rank,_,student_rel_type = probs_metric(inverse=True) else: _,_,_,_,student_prob,student_entropy,student_rank,_,student_rel_type = \ None, None, None, None, None, None, None, None, None student_probs.append( student_prob ) student_entropies.append( student_entropy ) student_ranks.append( student_rank ) student_rel_types.append( student_rel_type ) # if choosing_metric: # answer, distribution = choosing_object_metric() # else: # answer, distribution = None, None # object_answers.append( answer ) # object_distributions.append( distribution ) return zip(lmk_priors, rel_priors, lmk_posts, rel_posts, golden_log_probs, golden_entropies, golden_ranks, min_dists, rel_types, total_mass, student_probs, student_entropies, student_ranks, student_rel_types, object_answers, object_distributions, object_sentences)