Ejemplo n.º 1
0
def get_sentence_posteriors(sentence, iterations=1, extra_meaning=None):
    meaning_probs = {}
    # parse sentence with charniak and apply surgeries
    print 'parsing ...'
    modparse = get_modparse(sentence)
    t = ParentedTree.parse(modparse)
    print '\n%s\n' % t.pprint()
    num_ancestors = count_lmk_phrases(t) - 1

    for _ in xrange(iterations):
        (lmk, _, _), (rel, _, _) = get_meaning(num_ancestors=num_ancestors)
        meaning = m2s(lmk,rel)
        if meaning not in meaning_probs:
            ps = get_tree_probs(t, lmk, rel)[0]
            # print "Tree probs: ", zip(ps,rls)
            meaning_probs[meaning] = np.prod(ps)
        print '.'

    if extra_meaning:
        meaning = m2s(*extra_meaning)
        if meaning not in meaning_probs:
            ps = get_tree_probs(t, lmk, rel)[0]
            # print "Tree prob: ", zip(ps,rls)
            meaning_probs[meaning] = np.prod(ps)
        print '.'

    summ = sum(meaning_probs.values())
    for key in meaning_probs:
        meaning_probs[key] /= summ
    return meaning_probs.items()
Ejemplo n.º 2
0
def get_sentence_meaning_likelihood(sentence, lmk, rel):
    modparse = get_modparse(sentence)
    t = ParentedTree.parse(modparse)
    print '\n%s\n' % t.pprint()

    probs, entropies, lrpc, tps = get_tree_probs(t, lmk, rel)
    if np.prod(probs) == 0.0:
        logger('ERROR: Probability product is 0 for sentence: %s, lmk: %s, rel: %s, probs: %s' % (sentence, lmk, rel, str(probs)))
    return np.prod(probs), sum(entropies), lrpc, tps
Ejemplo n.º 3
0
def get_all_sentence_posteriors(sentence, meanings, golden=False, printing=True):

    print 'parsing ...'
    _, modparse = get_modparse(sentence)
    t = ParentedTree.parse(modparse)
    print '\n%s\n' % t.pprint()
    num_ancestors = count_lmk_phrases(t) - 1

    # logger(len(meanings))
    lmks, rels = zip(*meanings)
    lmks = set(lmks)
    rels = set(rels)
    # logger('num things ' + str(len(lmks))+' '+str(len(rels)))

    syms = ['\\', '|', '/', '-']
    sys.stdout.write('processing...\\')
    sys.stdout.flush()
    posteriors = {}
    for i,lmk in enumerate(lmks):
        if lmk.get_ancestor_count() != num_ancestors:
            p = 0
        else:
            ps = get_tree_probs(t[1], lmk, golden=golden, printing=printing)[0]
            p = np.prod(ps)
        posteriors[lmk] = p
        sys.stdout.write("\b%s" % syms[i % len(syms)])
        sys.stdout.flush()

    for i,rel in enumerate(rels):
        ps = get_tree_probs(t[0], rel=rel, golden=golden, printing=printing)[0]
        posteriors[rel] = np.prod(ps)
        sys.stdout.write("\b%s" % syms[i % len(syms)])
        sys.stdout.flush()

    for j in range(50):
        sys.stdout.write("\b.%s" % syms[(i+j) % len(syms)])
        sys.stdout.flush()
    print


    # for meaning in meanings:
    #     lmk,rel = meaning
    #     if lmk.get_ancestor_count() != num_ancestors:
    #         p = 0
    #     else:
    #         ps = get_tree_probs(t, lmk, rel, printing=False)[0]
    #         p = np.prod(ps)
    #     posteriors.append(p)
        # print p, lmk, lmk.ori_relations, rel, (rel.distance, rel.measurement.best_degree_class, rel.measurement.best_distance_class ) if hasattr(rel,'measurement') else 'No measurement'
    return posteriors
Ejemplo n.º 4
0
def get_all_sentence_posteriors(sentence, meanings):

    print 'parsing ...'
    modparse = get_modparse(sentence)
    t = ParentedTree.parse(modparse)
    print '\n%s\n' % t.pprint()
    num_ancestors = count_lmk_phrases(t) - 1

    posteriors = []
    for meaning in meanings:
        lmk,rel = meaning
        if lmk.get_ancestor_count() != num_ancestors:
            p = 0
        else:
            ps = get_tree_probs(t, lmk, rel)[0]
            p = np.prod(ps)
        posteriors.append(p)
    return posteriors
Ejemplo n.º 5
0
def get_sentence_posteriors(sentence, iterations=1):
    probs = []
    meanings = []

    # parse sentence with charniak and apply surgeries
    print 'parsing ...'
    modparse = get_modparse(sentence)
    t = ParentedTree.parse(modparse)
    print '\n%s\n' % t.pprint()
    num_ancestors = count_lmk_phrases(t) - 1

    for _ in xrange(iterations):
        meaning = get_meaning(num_ancestors=num_ancestors)
        lmk, rel = meaning
        probs.append(get_tree_prob(t, *meaning))
        meanings.append(m2s(lmk,rel))
        print '.'

    probs = np.array(probs) / sum(probs)
    return uniquify_distribution(meanings,  probs)
Ejemplo n.º 6
0



if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    # parser.add_argument('-n', '--num_iterations', type=int, default=1)
    # parser.add_argument('-l', '--location', type=Point)
    # parser.add_argument('--consistent', action='store_true')
    parser.add_argument('sentence')
    args = parser.parse_args()

    scene, speaker = construct_training_scene()

    print 'parsing ...'
    modparse = get_modparse(args.sentence)
    t = ParentedTree.parse(modparse)
    print '\n%s\n' % t.pprint()

    print_tree_entropy(t)

    raw_input()

    parts = build_meaning(t)
    for part in parts:
        print "Suggested for", part[0]
        items = sorted(part[1].items(), key=lambda x: x[1],reverse=True)
        for item in items:
            print '    ',rjust(item[0],40),item[1]
        print
Ejemplo n.º 7
0
    for s in all_scenes:
        toremove = []
        test_scene = {'scene':s['scene'],
                      'speaker':s['speaker'],
                      'lmks':[],
                      'loc_descs':[],
                      'ids':[]}
        for i,(lmk,sentence_chunks, eyedee) in enumerate(zip(s['lmks'], s['loc_descs'], s['ids'])):
            original = list(sentence_chunks)
            # print i, lmk,
            # for l in sentence_chunks:
            #     print l,'--',
            # print
            for chunk in list(sentence_chunks):
                try:
                    parsetree, modparsetree = get_modparse(chunk)
                    # print modparsetree
                    # raw_input()
#                    if ('(NP' in modparsetree or '(PP' in modparsetree):
#                        sentence_chunks.remove(chunk)
                   # if 'objects' in chunk:
                   #     sentence_chunks.remove(chunk)
#                    elif (' side' in chunk or
#                          'end' in chunk or
#                          'edge' in chunk or
#                          'corner' in chunk or 
#                          'middle' in chunk or 
#                          'center' in chunk or
#                          'centre' in chunk) and not ('table' in chunk):
#                        sentence_chunks.remove(chunk)
#                    elif 'viewer' in chunk or 'between' in chunk:
Ejemplo n.º 8
0
    for i in range(args.iterations):
        print 'sentence', i

        if (i % 50) == 0:
            scene, speaker = construct_training_scene(True)
            utils.scene.set_scene(scene,speaker)
            table = scene.landmarks['table'].representation.rect
        t_min = table.min_point
        t_max = table.max_point
        t_w = table.width
        t_h = table.height

        xloc,yloc = random()*t_w+t_min.x, random()*t_h+t_min.y
        trajector = Landmark( 'point', PointRepresentation(Vec2(xloc,yloc)), None, Landmark.POINT)
        sentence, rel, lmk = speaker.describe(trajector, scene, False, 1)
        parsestring, modparsestring = get_modparse(sentence)
        unique_sentences[sentence] = (parsestring, modparsestring)

        # convert variables to the right types
        loc = (xloc, yloc)
        parse = ParentedTree.parse(parsestring)
        modparse = ParentedTree.parse(modparsestring)

        # how many ancestors should the sampled landmark have?
        num_ancestors = count_lmk_phrases(modparse) - 1

        if num_ancestors == -1:
            print 'Failed to parse %d [%s] [%s] [%s]' % (i, sentence, parse, modparse)
            continue

        assert(not isinstance(lmk, tuple))
Ejemplo n.º 9
0
def train( meaning, sentence, update=1, printing=False):
    lmk,rel = meaning
    _, modparse = get_modparse(sentence)
    t = ParentedTree.parse(modparse)

    train_rec( tree=t, lmk=lmk, rel=rel, update=update, printing=printing)