Ejemplo n.º 1
0
def get_sentence_posteriors(sentence, iterations=1, extra_meaning=None):
    meaning_probs = {}
    # parse sentence with charniak and apply surgeries
    print 'parsing ...'
    modparse = get_modparse(sentence)
    t = ParentedTree.parse(modparse)
    print '\n%s\n' % t.pprint()
    num_ancestors = count_lmk_phrases(t) - 1

    for _ in xrange(iterations):
        (lmk, _, _), (rel, _, _) = get_meaning(num_ancestors=num_ancestors)
        meaning = m2s(lmk,rel)
        if meaning not in meaning_probs:
            ps = get_tree_probs(t, lmk, rel)[0]
            # print "Tree probs: ", zip(ps,rls)
            meaning_probs[meaning] = np.prod(ps)
        print '.'

    if extra_meaning:
        meaning = m2s(*extra_meaning)
        if meaning not in meaning_probs:
            ps = get_tree_probs(t, lmk, rel)[0]
            # print "Tree prob: ", zip(ps,rls)
            meaning_probs[meaning] = np.prod(ps)
        print '.'

    summ = sum(meaning_probs.values())
    for key in meaning_probs:
        meaning_probs[key] /= summ
    return meaning_probs.items()
Ejemplo n.º 2
0
def get_tree_prob(tree, lmk=None, rel=None):
    prob = 1.0

    if len(tree.productions()) == 1:
        # if this tree only has one production
        # it means that its child is a terminal (word)
        word = tree[0]
        pos = tree.node

        p = WordCPT.probability(word=word, pos=pos,
                                    lmk=lmk_id(lmk), rel=rel_type(rel))
        print p, pos, '->', word, m2s(lmk,rel)
        prob *= p

    else:
        lhs = tree.node
        rhs = ' '.join(n.node for n in tree)
        parent = tree.parent().node if tree.parent() else None

        if lhs == 'RELATION':
            # everything under a RELATION node should ignore the landmark
            lmk = None
        elif lhs == 'LANDMARK-PHRASE':
            # everything under a LANDMARK-PHRASE node should ignore the relation
            rel = None

            if parent == 'LANDMARK-PHRASE':
                # if the current node is a LANDMARK-PHRASE and the parent node
                # is also a LANDMARK-PHRASE then we should move to the parent
                # of the current landmark
                lmk = parent_landmark(lmk)

        if not parent:
            # LOCATION-PHRASE has no parent and is not related to lmk and rel
            p = ExpansionCPT.probability(rhs=rhs, lhs=lhs)
            print p, repr(lhs), '->', repr(rhs)
        else:
            p = ExpansionCPT.probability(rhs=rhs, lhs=lhs, parent=parent,
                                             lmk=lmk_id(lmk), rel=rel_type(rel))
            print p, repr(lhs), '->', repr(rhs), 'parent=%r'%parent, m2s(lmk,rel)
        prob *= p

        # call get_tree_prob recursively for each subtree
        for subtree in tree:
            prob *= get_tree_prob(subtree, lmk, rel)

    return prob
Ejemplo n.º 3
0
def generate_sentence(loc, consistent, scene=None, speaker=None):
    utils.scene = utils.ModelScene(scene, speaker)

    (lmk, lmk_prob, lmk_ent), (rel, rel_prob, rel_ent) = get_meaning(loc=loc)
    meaning1 = m2s(lmk, rel)
    logger(meaning1)

    while True:
        rel_exp_chain, rele_prob_chain, rele_ent_chain, rel_terminals, rel_landmarks = get_expansion(
            'RELATION', rel=rel)
        lmk_exp_chain, lmke_prob_chain, lmke_ent_chain, lmk_terminals, lmk_landmarks = get_expansion(
            'LANDMARK-PHRASE', lmk=lmk)
        rel_words, relw_prob, relw_ent, rel_a = get_words(
            rel_terminals, landmarks=rel_landmarks, rel=rel)
        lmk_words, lmkw_prob, lmkw_ent, lmk_a = get_words(
            lmk_terminals,
            landmarks=lmk_landmarks,
            prevword=(rel_words[-1] if rel_words else None))
        sentence = ' '.join(rel_words + lmk_words)

        logger('rel_exp_chain: %s' % rel_exp_chain)
        logger('lmk_exp_chain: %s' % lmk_exp_chain)

        meaning = Meaning(
            (lmk, lmk_prob, lmk_ent, rel, rel_prob, rel_ent, rel_exp_chain,
             rele_prob_chain, rele_ent_chain, rel_terminals, rel_landmarks,
             lmk_exp_chain, lmke_prob_chain, lmke_ent_chain, lmk_terminals,
             lmk_landmarks, rel_words, relw_prob, relw_ent, lmk_words,
             lmkw_prob, lmkw_ent))
        meaning.rel_a = rel_a
        meaning.lmk_a = lmk_a

        if consistent:
            # get the most likely meaning for the generated sentence
            try:
                posteriors = get_sentence_posteriors(sentence,
                                                     iterations=10,
                                                     extra_meaning=(lmk, rel))
            except:
                print 'try again ...'
                continue

            meaning2 = max(posteriors, key=itemgetter(1))[0]

            # is the original meaning the best one?
            if meaning1 != meaning2:
                print
                print 'sentence:', sentence
                print 'original:', meaning1
                print 'interpreted:', meaning2
                print 'try again ...'
                print
                continue

            for m, p in sorted(posteriors, key=itemgetter(1)):
                print m, p

        return meaning, sentence
Ejemplo n.º 4
0
def generate_sentence(loc, consistent):
    while True:
        lmk, rel = get_meaning(loc=loc)
        print m2s(lmk, rel)
        rel_exp, rel_prob, rel_ent = get_expansion('RELATION', rel=rel)
        lmk_exp, lmk_prob, lmk_ent = get_expansion('LANDMARK-PHRASE', lmk=lmk)
        rel_words, relw_prob, relw_ent = get_words(rel_exp, 'RELATION', rel=rel)
        lmk_words, lmkw_prob, lmkw_ent = get_words(lmk_exp, 'LANDMARK-PHRASE', lmk=lmk)
        sentence = ' '.join(rel_words + lmk_words)

        if consistent:
            meaning1 = m2s(lmk,rel)
            # get the most likely meaning for the generated sentence
            posteriors = get_sentence_posteriors(sentence)
            meaning2 = max(posteriors, key=itemgetter(1))[0]
            # is this what we are trying to say?
            if meaning1 != meaning2:
                continue

        return sentence
Ejemplo n.º 5
0
def generate_sentence(loc, consistent, scene=None, speaker=None, usebest=False, golden=False, meaning=None, printing=True):
    utils.scene = utils.ModelScene(scene, speaker)

    if meaning is None:
        (lmk, lmk_prob, lmk_ent), (rel, rel_prob, rel_ent) = get_meaning(loc=loc, usebest=usebest)
    else:
        lmk, rel = meaning
        lmk_prob = lmk_ent = rel_prob = rel_ent = None
    meaning1 = m2s(lmk, rel)
    logger( meaning1 )

    while True:
        rel_exp_chain, rele_prob_chain, rele_ent_chain, rel_terminals, rel_landmarks = get_expansion('RELATION', rel=rel, usebest=usebest, golden=golden, printing=printing)
        lmk_exp_chain, lmke_prob_chain, lmke_ent_chain, lmk_terminals, lmk_landmarks = get_expansion('LANDMARK-PHRASE', lmk=lmk, usebest=usebest, golden=golden, printing=printing)
        rel_words, relw_prob, relw_ent, rel_a = get_words(rel_terminals, landmarks=rel_landmarks, rel=rel, usebest=usebest, golden=golden, printing=printing)
        lmk_words, lmkw_prob, lmkw_ent, lmk_a = get_words(lmk_terminals, landmarks=lmk_landmarks, prevword=(rel_words[-1] if rel_words else None), usebest=usebest, golden=golden, printing=printing)
        sentence = ' '.join(rel_words + lmk_words)

        if printing: logger( 'rel_exp_chain: %s' % rel_exp_chain )
        if printing: logger( 'lmk_exp_chain: %s' % lmk_exp_chain )

        meaning = Meaning((lmk, lmk_prob, lmk_ent,
                           rel, rel_prob, rel_ent,
                           rel_exp_chain, rele_prob_chain, rele_ent_chain, rel_terminals, rel_landmarks,
                           lmk_exp_chain, lmke_prob_chain, lmke_ent_chain, lmk_terminals, lmk_landmarks,
                           rel_words, relw_prob, relw_ent,
                           lmk_words, lmkw_prob, lmkw_ent))
        meaning.rel_a = rel_a
        meaning.lmk_a = lmk_a

        if consistent:
             # get the most likely meaning for the generated sentence
            try:
                posteriors = get_sentence_posteriors(sentence, iterations=10, extra_meaning=(lmk,rel))
            except:
                print 'try again ...'
                continue

            meaning2 = max(posteriors, key=itemgetter(1))[0]

            # is the original meaning the best one?
            if meaning1 != meaning2:
                print
                print 'sentence:', sentence
                print 'original:', meaning1
                print 'interpreted:', meaning2
                print 'try again ...'
                print
                continue

            for m,p in sorted(posteriors, key=itemgetter(1)):
                print m, p

        return meaning, sentence
Ejemplo n.º 6
0
def get_sentence_posteriors(sentence, iterations=1):
    probs = []
    meanings = []

    # parse sentence with charniak and apply surgeries
    print 'parsing ...'
    modparse = get_modparse(sentence)
    t = ParentedTree.parse(modparse)
    print '\n%s\n' % t.pprint()
    num_ancestors = count_lmk_phrases(t) - 1

    for _ in xrange(iterations):
        meaning = get_meaning(num_ancestors=num_ancestors)
        lmk, rel = meaning
        probs.append(get_tree_prob(t, *meaning))
        meanings.append(m2s(lmk,rel))
        print '.'

    probs = np.array(probs) / sum(probs)
    return uniquify_distribution(meanings,  probs)
Ejemplo n.º 7
0
    def loop(num_iterations):
        min_dists = []
        lmk_priors = []
        rel_priors = []
        lmk_posts = []
        rel_posts = []
        golden_log_probs = []
        golden_entropies = []
        golden_ranks = []
        rel_types = []

        total_mass = []

        student_probs = []
        student_entropies = []
        student_ranks = []
        student_rel_types = []

        object_answers = []
        object_distributions = []

        epsilon = 1e-15
        for iteration in range(num_iterations):
            logger(('Iteration %d' % iteration),'okblue')
            rand_p = Vec2(random()*table.width+table.min_point.x, random()*table.height+table.min_point.y)
            trajector = Landmark( 'point', PointRepresentation(rand_p), None, Landmark.POINT )
            
            if initial_training:

                sentence, sampled_relation, sampled_landmark = speaker.describe(trajector, scene, False, 1)

                if num_samples:
                    for i in range(num_samples):
                        landmark, relation, _ = speaker.sample_meaning(trajector, scene, 1)
                        train((landmark,relation), sentence, update=1, printing=printing)
                else:
                    for (landmark,relation),prob in speaker.all_meaning_probs( trajector, scene, 1 ):
                        train((landmark,relation), sentence, update=prob, printing=printing)

            else:
                meaning, sentence = generate_sentence(rand_p, consistent, scene, speaker, usebest=True, printing=printing)
                logger( 'Generated sentence: %s' % sentence)

                if cheating:
                    landmark, relation = meaning.args[0],meaning.args[3]
                else:
                    if explicit_pointing:
                        landmark = meaning.args[0]
                    if ambiguous_pointing:
                        pointing_point = landmark.representation.middle + Vec2(random()*0.1-0.05,random()*0.1-0.05)
                    #_, bestsentence = generate_sentence(rand_p, consistent, scene, speaker, usebest=True, printing=printing)

                    try:
                        golden_posteriors = get_all_sentence_posteriors(sentence, meanings, golden=True, printing=printing)
                    except ParseError as e:
                        logger( e )
                        prob = 0
                        rank = len(meanings)-1
                        entropy = 0
                        ed = len(sentence)
                        golden_log_probs.append( prob )
                        golden_entropies.append( entropy )
                        golden_ranks.append( rank )
                        min_dists.append( ed )
                        continue
                    epsilon = 1e-15
                    ps = [[golden_posteriors[lmk]*golden_posteriors[rel],(lmk,rel)] for lmk, rel in meanings if ((not explicit_pointing) or lmk == landmark)]

                    if not explicit_pointing:
                        all_lmk_probs = speaker.all_landmark_probs(landmarks, Landmark(None, PointRepresentation(rand_p), None))
                        all_lmk_probs = dict(zip(landmarks, all_lmk_probs))
                    if ambiguous_pointing:
                        all_lmk_pointing_probs = speaker.all_landmark_probs(landmarks, Landmark(None, PointRepresentation(pointing_point), None))
                        all_lmk_pointing_probs = dict(zip(landmarks, all_lmk_pointing_probs))
                    temp = None
                    for i,(p,(lmk,rel)) in enumerate(ps):
                        # lmk,rel = meanings[i]
                        # logger( '%f, %s' % (p, m2s(lmk,rel)))
                        head_on = speaker.get_head_on_viewpoint(lmk)
                        if not explicit_pointing:
                            # ps[i][0] *= speaker.get_landmark_probability(lmk, landmarks, PointRepresentation(rand_p))[0]
                            ps[i][0] *= all_lmk_probs[lmk]
                        if ambiguous_pointing:
                            # ps[i][0] *= speaker.get_landmark_probability(lmk, landmarks, PointRepresentation(pointing_point))[0]
                            ps[i][0] *= all_lmk_pointing_probs[lmk]
                        ps[i][0] *= speaker.get_probabilities_points( np.array([rand_p]), rel, head_on, lmk)[0]
                        if lmk == meaning.args[0] and rel == meaning.args[3]:
                            temp = i

                    ps,_meanings = zip(*ps)
                    print ps
                    ps = np.array(ps)
                    ps += epsilon
                    ps = ps/ps.sum()
                    temp = ps[temp]

                    ps = sorted(zip(ps,_meanings),reverse=True)

                    logger( 'Attempted to say: %s' %  m2s(meaning.args[0],meaning.args[3]) )
                    logger( 'Interpreted as: %s' % m2s(ps[0][1][0],ps[0][1][1]) )
                    logger( 'Attempted: %s vs Interpreted: %s' % (str(temp), str(ps[0][0])))

                    # logger( 'Golden entropy: %f, Max entropy %f' % (golden_entropy, max_entropy))

                    landmark, relation = ps[0][1]
                head_on = speaker.get_head_on_viewpoint(landmark)
                all_descs = speaker.get_all_meaning_descriptions(trajector, scene, landmark, relation, head_on, 1)

                distances = []
                for desc in all_descs:
                    distances.append([edit_distance( sentence, desc ), desc])

                distances.sort()
                print distances

                correction = distances[0][1]
                if correction == sentence: 
                    correction = None
                    logger( 'No correction!!!!!!!!!!!!!!!!!!', 'okgreen' )
                accept_correction( meaning, correction, update_scale=scale, eval_lmk=(not explicit_pointing), multiply=multiply, printing=printing )

            def probs_metric(inverse=False):
                bestmeaning, bestsentence = generate_sentence(rand_p, consistent, scene, speaker, usebest=True, golden=inverse, printing=printing)
                sampled_landmark, sampled_relation = bestmeaning.args[0], bestmeaning.args[3]
                try:
                    golden_posteriors = get_all_sentence_posteriors(bestsentence, meanings, golden=(not inverse), printing=printing)

                    # lmk_prior = speaker.get_landmark_probability(sampled_landmark, landmarks, PointRepresentation(rand_p))[0]
                    all_lmk_probs = speaker.all_landmark_probs(landmarks, Landmark(None, PointRepresentation(rand_p), None))
                    all_lmk_probs = dict(zip(landmarks, all_lmk_probs))

                    lmk_prior = all_lmk_probs[sampled_landmark]
                    head_on = speaker.get_head_on_viewpoint(sampled_landmark)
                    rel_prior = speaker.get_probabilities_points( np.array([rand_p]), sampled_relation, head_on, sampled_landmark)
                    lmk_post = golden_posteriors[sampled_landmark]
                    rel_post = golden_posteriors[sampled_relation]

                    ps = np.array([golden_posteriors[lmk]*golden_posteriors[rel] for lmk, rel in meanings])
                    rank = None
                    for i,p in enumerate(ps):
                        lmk,rel = meanings[i]
                        # logger( '%f, %s' % (p, m2s(lmk,rel)))
                        head_on = speaker.get_head_on_viewpoint(lmk)
                        # ps[i] *= speaker.get_landmark_probability(lmk, landmarks, PointRepresentation(rand_p))[0]
                        ps[i] *= all_lmk_probs[lmk]
                        ps[i] *= speaker.get_probabilities_points( np.array([rand_p]), rel, head_on, lmk)
                        if lmk == sampled_landmark and rel == sampled_relation:
                            idx = i

                    ps += epsilon
                    ps = ps/ps.sum()
                    prob = ps[idx]
                    rank = sorted(ps, reverse=True).index(prob)
                    entropy = entropy_of_probs(ps)
                except ParseError as e:
                    logger( e )
                    lmk_prior = 0
                    rel_prior = 0
                    lmk_post = 0
                    rel_post = 0
                    prob = 0
                    rank = len(meanings)-1
                    entropy = 0
                    distances = [[None]]

                head_on = speaker.get_head_on_viewpoint(sampled_landmark)
                all_descs = speaker.get_all_meaning_descriptions(trajector, scene, sampled_landmark, sampled_relation, head_on, 1)
                distances = []
                for desc in all_descs:
                    distances.append([edit_distance( bestsentence, desc ), desc])
                distances.sort()
                return lmk_prior,rel_prior,lmk_post,rel_post,\
                       prob,entropy,rank,distances[0][0],type(sampled_relation)

            def db_mass():
                total = CProduction.get_production_sum(None)
                total += CWord.get_word_sum(None)
                return total

            def choosing_object_metric():
                trajector = choice(loi)

                sentence, sampled_relation, sampled_landmark = speaker.describe(trajector, scene, max_level=1)

                lmk_probs = []
                try:
                    combined_heatmaps = heatmaps_for_sentence(sentence, all_meanings, loi_infos, xs, ys, scene, speaker, step=step)
                    
                    for combined_heatmap,obj_lmk in zip(combined_heatmaps, loi):
                        ps = [p for (x,y),p in zip(list(product(xs,ys)),combined_heatmap) if obj_lmk.representation.contains_point( Vec2(x,y) )]
                        # print ps, xs.shape, ys.shape, combined_heatmap.shape
                        lmk_probs.append( (sum(ps)/len(ps), obj_lmk) )
                      
                    lmk_probs = sorted(lmk_probs, reverse=True)
                    top_p, top_lmk = lmk_probs[0]
                    lprobs, lmkss = zip(*lmk_probs)
                    
                    logger( sorted(zip(np.array(lprobs)/sum(lprobs), [(l.name, l.color, l.object_class) for l in lmkss]), reverse=True) )
                    logger( 'I bet %f you are talking about a %s %s %s' % (top_p/sum(lprobs), top_lmk.name, top_lmk.color, top_lmk.object_class) )
                    # objects.append(top_lmk)
                except Exception as e:
                    logger( 'Unable to get object from sentence. %s' % e, 'fail' )
                    print traceback.format_exc()
                    exit()
                return loi.index(trajector), [ (lprob, loi.index(lmk)) for lprob,lmk in lmk_probs ]

            if golden_metric:
                lmk_prior,rel_prior,lmk_post,rel_post,prob,entropy,rank,ed,rel_type = probs_metric()
            else:
                lmk_prior,rel_prior,lmk_post,rel_post,prob,entropy,rank,ed,rel_type = \
                None, None, None, None, None, None, None, None, None

            lmk_priors.append( lmk_prior )
            rel_priors.append( rel_prior )
            lmk_posts.append( lmk_post )
            rel_posts.append( rel_post )
            golden_log_probs.append( prob )
            golden_entropies.append( entropy )
            golden_ranks.append( rank )
            min_dists.append( ed )
            rel_types.append( rel_type )

            if mass_metric:
                total_mass.append( db_mass() )
            else:
                total_mass.append( None )

            if student_metric:
                _,_,_,_,student_prob,student_entropy,student_rank,_,student_rel_type = probs_metric(inverse=True)
            else:
                _,_,_,_,student_prob,student_entropy,student_rank,_,student_rel_type = \
                None, None, None, None, None, None, None, None, None

            student_probs.append( student_prob )
            student_entropies.append( student_entropy )
            student_ranks.append( student_rank )
            student_rel_types.append( student_rel_type )

            if choosing_metric:
                answer, distribution = choosing_object_metric()
            else:
                answer, distribution = None, None
            object_answers.append( answer )
            object_distributions.append( distribution )

        return zip(lmk_priors, rel_priors, lmk_posts, rel_posts,
                   golden_log_probs, golden_entropies, golden_ranks, 
                   min_dists, rel_types, total_mass, student_probs, 
                   student_entropies, student_ranks, student_rel_types,
                   object_answers, object_distributions)
Ejemplo n.º 8
0
    def loop(data):

        time.sleep(data['delay'])

        scene = data['scene']
        speaker = data['speaker']
        utils.scene.set_scene(scene,speaker)
        num_iterations = len(data['loc_descs'])

        all_meanings = data['all_meanings']
        loi = data['loi']
        loi_infos = data['loi_infos']
        landmarks = data['landmarks']
        sorted_meaning_lists = data['sorted_meaning_lists']
        learn_objects = data['learn_objects']

        def heatmaps_for_sentences(sentences, all_meanings, loi_infos, xs, ys, scene, speaker, step=0.02):
            printing=False
            x = np.array( [list(xs-step*0.5)]*len(ys) )
            y = np.array( [list(ys-step*0.5)]*len(xs) ).T
            scene_bb = scene.get_bounding_box()
            scene_bb = scene_bb.inflate( Vec2(scene_bb.width*0.5,scene_bb.height*0.5) )

            combined_heatmaps = []
            for obj_lmk, ms, heatmapss in loi_infos:

                combined_heatmap = None
                for sentence in sentences:
                    posteriors = None
                    while not posteriors:
                        try:
                            posteriors = get_all_sentence_posteriors(sentence, all_meanings, printing=printing)
                        except ParseError as pe:
                            raise pe
                        except Exception as e:
                            print e
                            sleeptime = random()*0.5
                            logger('Sleeping for %f and retrying "%s"' % (sleeptime,sentence))
                            time.sleep(sleeptime)
                            continue

                    big_heatmap1 = None
                    for m,(h1,h2) in zip(ms, heatmapss):

                        lmk,rel = m
                        p = posteriors[rel]*posteriors[lmk]
                        if big_heatmap1 is None:
                            big_heatmap1 = p*h1
                        else:
                            big_heatmap1 += p*h1

                    if combined_heatmap is None:
                        combined_heatmap = big_heatmap1
                    else:
                        combined_heatmap *= big_heatmap1

                combined_heatmaps.append(combined_heatmap)

            return combined_heatmaps

        object_answers = []
        object_distributions = []
        object_sentences =[]
        object_ids = []

        epsilon = 1e-15

        for iteration in range(num_iterations):
            logger(('Iteration %d comprehension' % iteration),'okblue')

            trajector = data['lmks'][iteration]
            if trajector is None:
                trajector = choice(loi)
            logger( 'Teacher chooses: %s' % trajector )

            probs, sorted_meanings = zip(*sorted_meaning_lists[trajector][:30])
            probs = np.array(probs)# - min(probs)
            probs /= probs.sum()

            sentences = data['loc_descs'][iteration]
            if sentences is None:
                (sampled_landmark, sampled_relation) = categorical_sample( sorted_meanings, probs )[0]
                logger( 'Teacher tries to say: %s' % m2s(sampled_landmark,sampled_relation) )
                head_on = speaker.get_head_on_viewpoint(sampled_landmark)

                sentences = [describe( head_on, trajector, sampled_landmark, sampled_relation )]

            object_sentences.append( ' '.join(sentences) )
            object_ids.append( data['ids'][iteration] )

            logger( 'Teacher says: %s' % ' '.join(sentences))

            for i,(p,sm) in enumerate(zip(probs[:15],sorted_meanings[:15])):
                lm,re = sm
                logger( '%i: %f %s' % (i,p,m2s(*sm)) )

            lmk_probs = []

            try:
                combined_heatmaps = heatmaps_for_sentences(sentences, all_meanings, loi_infos, xs, ys, scene, speaker, step=step)
            except ParseError as e:
                logger( 'Unable to get object from sentence. %s' % e, 'fail' )
                top_lmk = None
                distribution = [(0, False, False)]
            else:
                for combined_heatmap,obj_lmk in zip(combined_heatmaps, loi):

                    ps = [p for (x,y),p in zip(list(product(xs,ys)),combined_heatmap) if obj_lmk.representation.contains_point( Vec2(x,y) )]
                    # print ps, xs.shape, ys.shape, combined_heatmap.shape
                    lmk_probs.append( (sum(ps)/len(ps), obj_lmk) )

                lmk_probs = sorted(lmk_probs, reverse=True)
                top_p, top_lmk = lmk_probs[0]
                lprobs, lmkss = zip(*lmk_probs)

                distribution = [ (lprob, lmk.name, loi.index(lmk)) for lprob,lmk in lmk_probs ]
                logger( sorted(zip(np.array(lprobs)/sum(lprobs), [(l.name, l.color, l.object_class) for l in lmkss]), reverse=True) )
                logger( 'I bet %f you are talking about a %s %s %s' % (top_p/sum(lprobs), top_lmk.name, top_lmk.color, top_lmk.object_class) )
                # objects.append(top_lmk)

            answer = (trajector.name,loi.index(trajector))
            object_answers.append( answer )
            object_distributions.append( distribution )

            # Present top_lmk to teacher
            logger("top_lmk == trajector: %r, learn_objects: %r" % (top_lmk == trajector,learn_objects), 'okgreen')
            if top_lmk == trajector or not learn_objects:
                # Give morphine
                logger("Ahhhhh, morphine...", 'okgreen')
                pass
            else:
                logger("LEARNING!!!!!!!!!!!", 'okgreen')
                updates, _ = zip(*sorted_meaning_lists[trajector][:30])
                howmany=5
                for sentence in sentences:
                    for _ in range(howmany):
                        meaning = categorical_sample( sorted_meanings, probs )[0]
                        update = updates[ sorted_meanings.index(meaning) ]
                        try:
                            accept_object_correction( meaning, sentence, update*scale, printing=printing)
                        except:
                            pass
                    for update, meaning in sorted_meaning_lists[trajector][-howmany:]:
                        try:
                            accept_object_correction( meaning, sentence, update*scale, printing=printing)
                        except:
                            pass

        return zip(object_answers, object_distributions, object_sentences, object_ids)
Ejemplo n.º 9
0
    def loop(data):

        time.sleep(random())

        if 'num_iterations' in data:
            scene, speaker = construct_training_scene(True)
            num_iterations = data['num_iterations']
        else:
            scene = data['scene']
            speaker = data['speaker']
            num_iterations = len(data['loc_descs'])

        utils.scene.set_scene(scene,speaker)

        scene_bb = scene.get_bounding_box()
        scene_bb = scene_bb.inflate( Vec2(scene_bb.width*0.5,scene_bb.height*0.5) )
        table = scene.landmarks['table'].representation.get_geometry()

        # step = 0.04
        loi = [lmk for lmk in scene.landmarks.values() if lmk.name != 'table']
        all_heatmaps_tupless, xs, ys = speaker.generate_all_heatmaps(scene, step=step, loi=loi)

        loi_infos = []
        all_meanings = set()
        for obj_lmk,all_heatmaps_tuples in zip(loi, all_heatmaps_tupless):

            lmks, rels, heatmapss = zip(*all_heatmaps_tuples)
            meanings = zip(lmks,rels)
            # print meanings
            all_meanings.update(meanings)
            loi_infos.append( (obj_lmk, meanings, heatmapss) )

        all_heatmaps_tupless, xs, ys = speaker.generate_all_heatmaps(scene, step=step)
        all_heatmaps_tuples = all_heatmaps_tupless[0]
        # x = np.array( [list(xs-step*0.5)]*len(ys) )
        # y = np.array( [list(ys-step*0.5)]*len(xs) ).T
        # for lamk, rel, (heatmap1,heatmap2) in all_heatmaps_tuples:
        #     logger( m2s(lamk,rel))
        #     if isinstance(rel, DistanceRelation):
        #         probabilities = heatmap2.reshape( (len(xs),len(ys)) ).T
        #         plt.pcolor(x, y, probabilities, cmap = 'jet', edgecolors='none', alpha=0.7)
        #         plt.colorbar()
        #         for lmk in scene.landmarks.values():
        #             if isinstance(lmk.representation, GroupLineRepresentation):
        #                 xxs = [lmk.representation.line.start.x, lmk.representation.line.end.x]
        #                 yys = [lmk.representation.line.start.y, lmk.representation.line.end.y]
        #                 plt.fill(xxs,yys,facecolor='none',linewidth=2)
        #             elif isinstance(lmk.representation, RectangleRepresentation):
        #                 rect = lmk.representation.rect
        #                 xxs = [rect.min_point.x,rect.min_point.x,rect.max_point.x,rect.max_point.x]
        #                 yys = [rect.min_point.y,rect.max_point.y,rect.max_point.y,rect.min_point.y]
        #                 plt.fill(xxs,yys,facecolor='none',linewidth=2)
        #                 plt.text(rect.min_point.x+0.01,rect.max_point.y+0.02,lmk.name)
        #         plt.title(m2s(lamk,rel))
        #         logger("Showing")
        #         plt.show()
        #     logger("End")

        x = np.array( [list(xs-step*0.5)]*len(ys) )
        y = np.array( [list(ys-step*0.5)]*len(xs) ).T
        lmks, rels, heatmapss = zip(*all_heatmaps_tuples)
        # graphmax1 = graphmax2 = 0
        meanings = zip(lmks,rels)
        landmarks = list(set(lmks))
        # relations = list(set(rels))

        epsilon = 0.0001
        def heatmaps_for_sentences(sentences, all_meanings, loi_infos, xs, ys, scene, speaker, step=0.02):
            printing=False
            x = np.array( [list(xs-step*0.5)]*len(ys) )
            y = np.array( [list(ys-step*0.5)]*len(xs) ).T
            scene_bb = scene.get_bounding_box()
            scene_bb = scene_bb.inflate( Vec2(scene_bb.width*0.5,scene_bb.height*0.5) )
            # x = np.array( [list(xs-step*0.5)]*len(ys) )
            # y = np.array( [list(ys-step*0.5)]*len(xs) ).T

            combined_heatmaps = []
            for obj_lmk, ms, heatmapss in loi_infos:

                # for m,(h1,h2) in zip(ms, heatmapss):

                #     logger( h1.shape )
                #     logger( x.shape )
                #     logger( y.shape )
                #     logger( xs.shape )
                #     logger( ys.shape )
                #     plt.pcolor(x, y, h1.reshape((len(xs),len(ys))).T, cmap = 'jet', edgecolors='none', alpha=0.7)
                #     plt.colorbar()

                #     for lmk in scene.landmarks.values():
                #         if isinstance(lmk.representation, GroupLineRepresentation):
                #             xxs = [lmk.representation.line.start.x, lmk.representation.line.end.x]
                #             yys = [lmk.representation.line.start.y, lmk.representation.line.end.y]
                #             plt.fill(xxs,yys,facecolor='none',linewidth=2)
                #         elif isinstance(lmk.representation, RectangleRepresentation):
                #             rect = lmk.representation.rect
                #             xxs = [rect.min_point.x,rect.min_point.x,rect.max_point.x,rect.max_point.x]
                #             yys = [rect.min_point.y,rect.max_point.y,rect.max_point.y,rect.min_point.y]
                #             plt.fill(xxs,yys,facecolor='none',linewidth=2)
                #             plt.text(rect.min_point.x+0.01,rect.max_point.y+0.02,lmk.name)
                #     plt.title(m2s(*m))
                #     logger( m2s(*m))
                #     plt.axis('scaled')
                #     plt.show()

                combined_heatmap = None
                for sentence in sentences:
                    posteriors = get_all_sentence_posteriors(sentence, all_meanings, printing=printing)

                    big_heatmap1 = None
                    for m,(h1,h2) in zip(ms, heatmapss):

                        lmk,rel = m
                        p = posteriors[rel]*posteriors[lmk]
                        if big_heatmap1 is None:
                            big_heatmap1 = p*h1
                        else:
                            big_heatmap1 += p*h1

                    if combined_heatmap is None:
                        combined_heatmap = big_heatmap1
                    else:
                        combined_heatmap *= big_heatmap1

                combined_heatmaps.append(combined_heatmap)

            return combined_heatmaps

        object_meaning_applicabilities = {}
        for obj_lmk, ms, heatmapss in loi_infos:
            for m,(h1,h2) in zip(ms, heatmapss):
                ps = [p for (x,y),p in zip(list(product(xs,ys)),h1) if obj_lmk.representation.contains_point( Vec2(x,y) )]
                if m not in object_meaning_applicabilities:
                    object_meaning_applicabilities[m] = {}
                object_meaning_applicabilities[m][obj_lmk] = sum(ps)/len(ps)

        k = len(loi)
        for meaning_dict in object_meaning_applicabilities.values():
            total = sum( meaning_dict.values() )
            if total != 0:
                for obj_lmk in meaning_dict.keys():
                    meaning_dict[obj_lmk] = meaning_dict[obj_lmk]/total - 1.0/k
                total = sum( [value for value in meaning_dict.values() if value > 0] )
                for obj_lmk in meaning_dict.keys():
                    meaning_dict[obj_lmk] = (2 if meaning_dict[obj_lmk] > 0 else 1)*meaning_dict[obj_lmk] - total

        sorted_meaning_lists = {}

        for m in object_meaning_applicabilities.keys():
            for obj_lmk in object_meaning_applicabilities[m].keys():
                if obj_lmk not in sorted_meaning_lists:
                    sorted_meaning_lists[obj_lmk] = []
                sorted_meaning_lists[obj_lmk].append( (object_meaning_applicabilities[m][obj_lmk], m) )
        for obj_lmk in sorted_meaning_lists.keys():
            sorted_meaning_lists[obj_lmk].sort(reverse=True)

        min_dists = []
        lmk_priors = []
        rel_priors = []
        lmk_posts = []
        rel_posts = []
        golden_log_probs = []
        golden_entropies = []
        golden_ranks = []
        rel_types = []

        total_mass = []

        student_probs = []
        student_entropies = []
        student_ranks = []
        student_rel_types = []

        object_answers = []
        object_distributions = []
        object_sentences =[]

        epsilon = 1e-15

        for iteration in range(num_iterations):
            logger(('Iteration %d comprehension' % iteration),'okblue')

            if 'loc_descs' in data:
                trajector = data['lmks'][iteration]
                logger( 'Teacher chooses: %s' % trajector )
                sentences = data['loc_descs'][iteration]
                probs, sorted_meanings = zip(*sorted_meaning_lists[trajector][:30])
                probs = np.array(probs)# - min(probs)
                probs /= probs.sum()
                if sentences is None:
                    (sampled_landmark, sampled_relation) = categorical_sample( sorted_meanings, probs )[0]
                    logger( 'Teacher tries to say: %s' % m2s(sampled_landmark,sampled_relation) )
                    head_on = speaker.get_head_on_viewpoint(sampled_landmark)

                    sentences = [describe( head_on, trajector, sampled_landmark, sampled_relation )]
            else:
                # Teacher describe
                trajector = choice(loi)
                # sentence, sampled_relation, sampled_landmark = speaker.describe(trajector, scene, max_level=1)
                logger( 'Teacher chooses: %s' % trajector )
                # Choose from meanings
                probs, sorted_meanings = zip(*sorted_meaning_lists[trajector][:30])
                probs = np.array(probs)# - min(probs)
                probs /= probs.sum()
                (sampled_landmark, sampled_relation) = categorical_sample( sorted_meanings, probs )[0]
                logger( 'Teacher tries to say: %s' % m2s(sampled_landmark,sampled_relation) )

                # Generate sentence
                # _, sentence = generate_sentence(None, False, scene, speaker, meaning=(sampled_landmark, sampled_relation), golden=True, printing=printing)

                sentences = [describe( speaker.get_head_on_viewpoint(sampled_landmark), trajector, sampled_landmark, sampled_relation )]

            object_sentences.append( ' '.join(sentences) )
            logger( 'Teacher says: %s' % ' '.join(sentences))
            for i,(p,sm) in enumerate(zip(probs[:15],sorted_meanings[:15])):
                lm,re = sm
                logger( '%i: %f %s' % (i,p,m2s(*sm)) )
                # head_on = speaker.get_head_on_viewpoint(lm)
                # speaker.visualize( scene, trajector, head_on, lm, re)

            lmk_probs = []

            try:
                combined_heatmaps = heatmaps_for_sentences(sentences, all_meanings, loi_infos, xs, ys, scene, speaker, step=step)

                for combined_heatmap,obj_lmk in zip(combined_heatmaps, loi):

                    # x = np.array( [list(xs-step*0.5)]*len(ys) )
                    # y = np.array( [list(ys-step*0.5)]*len(xs) ).T
                    # logger( combined_heatmap.shape )
                    # logger( x.shape )
                    # logger( y.shape )
                    # logger( xs.shape )
                    # logger( ys.shape )
                    # plt.pcolor(x, y, combined_heatmap.reshape((len(xs),len(ys))).T, cmap = 'jet', edgecolors='none', alpha=0.7)
                    # plt.colorbar()

                    # for lmk in scene.landmarks.values():
                    #     if isinstance(lmk.representation, GroupLineRepresentation):
                    #         xxs = [lmk.representation.line.start.x, lmk.representation.line.end.x]
                    #         yys = [lmk.representation.line.start.y, lmk.representation.line.end.y]
                    #         plt.fill(xxs,yys,facecolor='none',linewidth=2)
                    #     elif isinstance(lmk.representation, RectangleRepresentation):
                    #         rect = lmk.representation.rect
                    #         xxs = [rect.min_point.x,rect.min_point.x,rect.max_point.x,rect.max_point.x]
                    #         yys = [rect.min_point.y,rect.max_point.y,rect.max_point.y,rect.min_point.y]
                    #         plt.fill(xxs,yys,facecolor='none',linewidth=2)
                    #         plt.text(rect.min_point.x+0.01,rect.max_point.y+0.02,lmk.name)
                    # plt.axis('scaled')
                    # plt.axis([scene_bb.min_point.x, scene_bb.max_point.x, scene_bb.min_point.y, scene_bb.max_point.y])
                    # plt.show()

                    ps = [p for (x,y),p in zip(list(product(xs,ys)),combined_heatmap) if obj_lmk.representation.contains_point( Vec2(x,y) )]
                    # print ps, xs.shape, ys.shape, combined_heatmap.shape
                    lmk_probs.append( (sum(ps)/len(ps), obj_lmk) )

                lmk_probs = sorted(lmk_probs, reverse=True)
                top_p, top_lmk = lmk_probs[0]
                lprobs, lmkss = zip(*lmk_probs)

                answer, distribution = loi.index(trajector), [ (lprob, loi.index(lmk)) for lprob,lmk in lmk_probs ]
                logger( sorted(zip(np.array(lprobs)/sum(lprobs), [(l.name, l.color, l.object_class) for l in lmkss]), reverse=True) )
                logger( 'I bet %f you are talking about a %s %s %s' % (top_p/sum(lprobs), top_lmk.name, top_lmk.color, top_lmk.object_class) )
                # objects.append(top_lmk)
            except Exception as e:
                logger( 'Unable to get object from sentence. %s' % e, 'fail' )
                answer = None
                top_lmk = None
                distribution = [(0,False)]

            object_answers.append( answer )
            object_distributions.append( distribution )

            # Present top_lmk to teacher
            if top_lmk == trajector:
                # Give morphine
                pass
            else:
                updates, _ = zip(*sorted_meaning_lists[trajector][:30])
                howmany=5
                for sentence in sentences:
                    for _ in range(howmany):
                        meaning = categorical_sample( sorted_meanings, probs )[0]
                        update = updates[ sorted_meanings.index(meaning) ]
                        try:
                            accept_object_correction( meaning, sentence, update*scale, printing=printing)
                        except:
                            pass
                    for update, meaning in sorted_meaning_lists[trajector][-howmany:]:
                        try:
                            accept_object_correction( meaning, sentence, update*scale, printing=printing)
                        except:
                            pass

            for _ in range(0):
	            logger(('Iteration %d production' % iteration),'okblue')
	            rand_p = Vec2(random()*table.width+table.min_point.x, random()*table.height+table.min_point.y)
	            trajector = Landmark( 'point', PointRepresentation(rand_p), None, Landmark.POINT )

	            meaning, sentence = generate_sentence(rand_p, False, scene, speaker, usebest=True, printing=printing)
	            logger( 'Generated sentence: %s' % sentence)

	            landmark = meaning.args[0]
	            # if ambiguous_pointing:
	                # pointing_point = landmark.representation.middle + Vec2(random()*0.1-0.05,random()*0.1-0.05)
	            #_, bestsentence = generate_sentence(rand_p, False, scene, speaker, usebest=True, printing=printing)

	            try:
	                golden_posteriors = get_all_sentence_posteriors(sentence, meanings, golden=True, printing=printing)
	            except ParseError as e:
	                logger( e )
	                prob = 0
	                rank = len(meanings)-1
	                entropy = 0
	                ed = len(sentence)
	                golden_log_probs.append( prob )
	                golden_entropies.append( entropy )
	                golden_ranks.append( rank )
	                min_dists.append( ed )
	                continue
	            epsilon = 1e-15
	            ps = [[golden_posteriors[lmk]*golden_posteriors[rel],(lmk,rel)] for lmk, rel in meanings if (lmk == landmark)]

	            all_lmk_probs = speaker.all_landmark_probs(landmarks, Landmark(None, PointRepresentation(rand_p), None))
	            all_lmk_probs = dict(zip(landmarks, all_lmk_probs))
	            temp = None
	            for i,(p,(lmk,rel)) in enumerate(ps):
	                # lmk,rel = meanings[i]
	                # logger( '%f, %s' % (p, m2s(lmk,rel)))
	                head_on = speaker.get_head_on_viewpoint(lmk)
	                ps[i][0] *= speaker.get_probabilities_points( np.array([rand_p]), rel, head_on, lmk)[0]
	                if lmk == meaning.args[0] and rel == meaning.args[3]:
	                    temp = i

	            ps,_meanings = zip(*ps)
	            print ps
	            ps = np.array(ps)
	            ps += epsilon
	            ps = ps/ps.sum()
	            temp = ps[temp]

	            ps = sorted(zip(ps,_meanings),reverse=True)

	            logger( 'Attempted to say: %s' %  m2s(meaning.args[0],meaning.args[3]) )
	            logger( 'Interpreted as: %s' % m2s(ps[0][1][0],ps[0][1][1]) )
	            logger( 'Attempted: %f vs Interpreted: %f' % (temp, ps[0][0]))

	            # logger( 'Golden entropy: %f, Max entropy %f' % (golden_entropy, max_entropy))

	            landmark, relation = ps[0][1]
	            head_on = speaker.get_head_on_viewpoint(landmark)
	            all_descs = speaker.get_all_meaning_descriptions(trajector, scene, landmark, relation, head_on, 1)

	            distances = []
	            for desc in all_descs:
	                distances.append([edit_distance( sentence, desc ), desc])

	            distances.sort()
	            print distances

	            correction = distances[0][1]
	            # if correction == sentence:
	            #     correction = None
	            #     logger( 'No correction!!!!!!!!!!!!!!!!!!', 'okgreen' )
	            accept_correction( meaning, correction, update_scale=scale, eval_lmk=False, multiply=False, printing=printing )


            def probs_metric(inverse=False):
                rand_p = Vec2(random()*table.width+table.min_point.x, random()*table.height+table.min_point.y)
                try:
                    bestmeaning, bestsentence = generate_sentence(rand_p, False, scene, speaker, usebest=True, golden=inverse, printing=printing)
                    sampled_landmark, sampled_relation = bestmeaning.args[0], bestmeaning.args[3]
                    golden_posteriors = get_all_sentence_posteriors(bestsentence, meanings, golden=(not inverse), printing=printing)

                    # lmk_prior = speaker.get_landmark_probability(sampled_landmark, landmarks, PointRepresentation(rand_p))[0]
                    all_lmk_probs = speaker.all_landmark_probs(landmarks, Landmark(None, PointRepresentation(rand_p), None))
                    all_lmk_probs = dict(zip(landmarks, all_lmk_probs))

                    lmk_prior = all_lmk_probs[sampled_landmark]
                    head_on = speaker.get_head_on_viewpoint(sampled_landmark)
                    rel_prior = speaker.get_probabilities_points( np.array([rand_p]), sampled_relation, head_on, sampled_landmark)
                    lmk_post = golden_posteriors[sampled_landmark]
                    rel_post = golden_posteriors[sampled_relation]

                    ps = np.array([golden_posteriors[lmk]*golden_posteriors[rel] for lmk, rel in meanings])
                    rank = None
                    for i,p in enumerate(ps):
                        lmk,rel = meanings[i]
                        # logger( '%f, %s' % (p, m2s(lmk,rel)))
                        head_on = speaker.get_head_on_viewpoint(lmk)
                        # ps[i] *= speaker.get_landmark_probability(lmk, landmarks, PointRepresentation(rand_p))[0]
                        ps[i] *= all_lmk_probs[lmk]
                        ps[i] *= speaker.get_probabilities_points( np.array([rand_p]), rel, head_on, lmk)
                        if lmk == sampled_landmark and rel == sampled_relation:
                            idx = i

                    ps += epsilon
                    ps = ps/ps.sum()
                    prob = ps[idx]
                    rank = sorted(ps, reverse=True).index(prob)
                    entropy = entropy_of_probs(ps)
                except (ParseError,RuntimeError) as e:
                    logger( e )
                    lmk_prior = 0
                    rel_prior = 0
                    lmk_post = 0
                    rel_post = 0
                    prob = 0
                    rank = len(meanings)-1
                    entropy = 0
                    distances = [[None]]

                head_on = speaker.get_head_on_viewpoint(sampled_landmark)
                all_descs = speaker.get_all_meaning_descriptions(trajector, scene, sampled_landmark, sampled_relation, head_on, 1)
                distances = []
                for desc in all_descs:
                    distances.append([edit_distance( bestsentence, desc ), desc])
                distances.sort()
                return lmk_prior,rel_prior,lmk_post,rel_post,\
                       prob,entropy,rank,distances[0][0],type(sampled_relation)

            def db_mass():
                total = CProduction.get_production_sum(None)
                total += CWord.get_word_sum(None)
                return total

            def choosing_object_metric():
                trajector = choice(loi)

                sentence, sampled_relation, sampled_landmark = speaker.describe(trajector, scene, max_level=1)

                lmk_probs = []
                try:
                    combined_heatmaps = heatmaps_for_sentence(sentence, all_meanings, loi_infos, xs, ys, scene, speaker, step=step)

                    for combined_heatmap,obj_lmk in zip(combined_heatmaps, loi):
                        ps = [p for (x,y),p in zip(list(product(xs,ys)),combined_heatmap) if obj_lmk.representation.contains_point( Vec2(x,y) )]
                        # print ps, xs.shape, ys.shape, combined_heatmap.shape
                        lmk_probs.append( (sum(ps)/len(ps), obj_lmk) )

                    lmk_probs = sorted(lmk_probs, reverse=True)
                    top_p, top_lmk = lmk_probs[0]
                    lprobs, lmkss = zip(*lmk_probs)

                    logger( sorted(zip(np.array(lprobs)/sum(lprobs), [(l.name, l.color, l.object_class) for l in lmkss]), reverse=True) )
                    logger( 'I bet %f you are talking about a %s %s %s' % (top_p/sum(lprobs), top_lmk.name, top_lmk.color, top_lmk.object_class) )
                    # objects.append(top_lmk)
                except Exception as e:
                    logger( 'Unable to get object from sentence. %s' % e, 'fail' )
                    print traceback.format_exc()
                    exit()
                return loi.index(trajector), [ (lprob, loi.index(lmk)) for lprob,lmk in lmk_probs ]

            if golden_metric:
                lmk_prior,rel_prior,lmk_post,rel_post,prob,entropy,rank,ed,rel_type = probs_metric()
            else:
                lmk_prior,rel_prior,lmk_post,rel_post,prob,entropy,rank,ed,rel_type = [None]*9

            lmk_priors.append( lmk_prior )
            rel_priors.append( rel_prior )
            lmk_posts.append( lmk_post )
            rel_posts.append( rel_post )
            golden_log_probs.append( prob )
            golden_entropies.append( entropy )
            golden_ranks.append( rank )
            min_dists.append( ed )
            rel_types.append( rel_type )

            if mass_metric:
                total_mass.append( db_mass() )
            else:
                total_mass.append( None )

            if student_metric:
                _,_,_,_,student_prob,student_entropy,student_rank,_,student_rel_type = probs_metric(inverse=True)
            else:
                _,_,_,_,student_prob,student_entropy,student_rank,_,student_rel_type = \
                None, None, None, None, None, None, None, None, None

            student_probs.append( student_prob )
            student_entropies.append( student_entropy )
            student_ranks.append( student_rank )
            student_rel_types.append( student_rel_type )

            # if choosing_metric:
            #     answer, distribution = choosing_object_metric()
            # else:
            #     answer, distribution = None, None
            # object_answers.append( answer )
            # object_distributions.append( distribution )

        return zip(lmk_priors, rel_priors, lmk_posts, rel_posts,
                   golden_log_probs, golden_entropies, golden_ranks,
                   min_dists, rel_types, total_mass, student_probs,
                   student_entropies, student_ranks, student_rel_types,
                   object_answers, object_distributions, object_sentences)