def get_sentence_posteriors(sentence, iterations=1, extra_meaning=None):
    meaning_probs = {}
    # parse sentence with charniak and apply surgeries
    print 'parsing ...'
    modparse = get_modparse(sentence)
    t = ParentedTree.parse(modparse)
    print '\n%s\n' % t.pprint()
    num_ancestors = count_lmk_phrases(t) - 1

    for _ in xrange(iterations):
        (lmk, _, _), (rel, _, _) = get_meaning(num_ancestors=num_ancestors)
        meaning = m2s(lmk,rel)
        if meaning not in meaning_probs:
            ps = get_tree_probs(t, lmk, rel)[0]
            # print "Tree probs: ", zip(ps,rls)
            meaning_probs[meaning] = np.prod(ps)
        print '.'

    if extra_meaning:
        meaning = m2s(*extra_meaning)
        if meaning not in meaning_probs:
            ps = get_tree_probs(t, lmk, rel)[0]
            # print "Tree prob: ", zip(ps,rls)
            meaning_probs[meaning] = np.prod(ps)
        print '.'

    summ = sum(meaning_probs.values())
    for key in meaning_probs:
        meaning_probs[key] /= summ
    return meaning_probs.items()
Beispiel #2
0
def get_modparse(sentence):
    """returns the modified parse tree for a sentence"""
    sp_db = SentenceParse.get_sentence_parse(sentence)
    try:
        res = sp_db.all()[0]
        parsetree = res.original_parse
        modparsetree = res.modified_parse
    except:
        print "parse.py: 103: " + sentence
        parses = parse_sentences([sentence])
        if len(parses) == 0:
            raise ParseError(printcolors.WARNING + ('ParseError: a sentence was empty'))

        modparses = modify_parses(parses)
        for i,chunk in enumerate(modparses[:]):
            for j,modparse in enumerate(chunk):
                if 'LANDMARK-PHRASE' in modparse:
                    modparses[i] = modparse
                    parses[i] = parses[i][j]
                    break
            if isinstance(modparses[i],list):
                modparses[i] = modparses[i][0]
                parses[i] = parses[i][0]

        parsetree = parses[0]
        modparsetree = modparses[0]
        try:
            SentenceParse.add_sentence_parse(sentence, parsetree, modparsetree)
        except Exception as e:
            print e

    if count_lmk_phrases(ParentedTree.parse(modparsetree)) < 1:
        raise ParseError(printcolors.WARNING + ('ParseError: Parse contained no Landmark phrase.\nSentence: %s\nParse: %s\nModparse: %s' % (sentence,parsetree,modparsetree)))

    return parsetree, modparsetree
Beispiel #3
0
def get_all_sentence_posteriors(sentence, meanings, golden=False, printing=True):

    print 'parsing ...'
    _, modparse = get_modparse(sentence)
    t = ParentedTree.parse(modparse)
    print '\n%s\n' % t.pprint()
    num_ancestors = count_lmk_phrases(t) - 1

    # logger(len(meanings))
    lmks, rels = zip(*meanings)
    lmks = set(lmks)
    rels = set(rels)
    # logger('num things ' + str(len(lmks))+' '+str(len(rels)))

    syms = ['\\', '|', '/', '-']
    sys.stdout.write('processing...\\')
    sys.stdout.flush()
    posteriors = {}
    for i,lmk in enumerate(lmks):
        if lmk.get_ancestor_count() != num_ancestors:
            p = 0
        else:
            ps = get_tree_probs(t[1], lmk, golden=golden, printing=printing)[0]
            p = np.prod(ps)
        posteriors[lmk] = p
        sys.stdout.write("\b%s" % syms[i % len(syms)])
        sys.stdout.flush()

    for i,rel in enumerate(rels):
        ps = get_tree_probs(t[0], rel=rel, golden=golden, printing=printing)[0]
        posteriors[rel] = np.prod(ps)
        sys.stdout.write("\b%s" % syms[i % len(syms)])
        sys.stdout.flush()

    for j in range(50):
        sys.stdout.write("\b.%s" % syms[(i+j) % len(syms)])
        sys.stdout.flush()
    print


    # for meaning in meanings:
    #     lmk,rel = meaning
    #     if lmk.get_ancestor_count() != num_ancestors:
    #         p = 0
    #     else:
    #         ps = get_tree_probs(t, lmk, rel, printing=False)[0]
    #         p = np.prod(ps)
    #     posteriors.append(p)
        # print p, lmk, lmk.ori_relations, rel, (rel.distance, rel.measurement.best_degree_class, rel.measurement.best_distance_class ) if hasattr(rel,'measurement') else 'No measurement'
    return posteriors
def get_all_sentence_posteriors(sentence, meanings):

    print 'parsing ...'
    modparse = get_modparse(sentence)
    t = ParentedTree.parse(modparse)
    print '\n%s\n' % t.pprint()
    num_ancestors = count_lmk_phrases(t) - 1

    posteriors = []
    for meaning in meanings:
        lmk,rel = meaning
        if lmk.get_ancestor_count() != num_ancestors:
            p = 0
        else:
            ps = get_tree_probs(t, lmk, rel)[0]
            p = np.prod(ps)
        posteriors.append(p)
    return posteriors
Beispiel #5
0
def get_sentence_posteriors(sentence, iterations=1):
    probs = []
    meanings = []

    # parse sentence with charniak and apply surgeries
    print 'parsing ...'
    modparse = get_modparse(sentence)
    t = ParentedTree.parse(modparse)
    print '\n%s\n' % t.pprint()
    num_ancestors = count_lmk_phrases(t) - 1

    for _ in xrange(iterations):
        meaning = get_meaning(num_ancestors=num_ancestors)
        lmk, rel = meaning
        probs.append(get_tree_prob(t, *meaning))
        meanings.append(m2s(lmk,rel))
        print '.'

    probs = np.array(probs) / sum(probs)
    return uniquify_distribution(meanings,  probs)
Beispiel #6
0
    for i, row in enumerate(reader, start=1):
        print "sentence", i

        # unpack row
        xloc, yloc, sentence, parse, modparse = row

        # convert variables to the right types
        xloc = float(xloc)
        yloc = float(yloc)
        loc = (xloc, yloc)
        parse = ParentedTree.parse(parse)
        modparse = ParentedTree.parse(modparse)

        # how many ancestors should the sampled landmark have?
        num_ancestors = count_lmk_phrases(modparse) - 1

        # sample `args.iterations` times for each sentence
        for _ in xrange(args.iterations):
            lmk, rel = get_meaning(loc, num_ancestors)

            if args.verbose:
                print "utterance:", repr(sentence)
                print "location: %s" % repr(loc)
                print "landmark: %s (%s)" % (lmk, lmk_id(lmk))
                print "relation: %s" % rel_type(rel)
                print "parse:"
                print parse.pprint()
                print "modparse:"
                print modparse.pprint()
                print "-" * 70
Beispiel #7
0
    for i,row in enumerate(reader, start=1):
        print 'sentence', i

        # unpack row
        xloc, yloc, sentence, parse, modparse = row
        unique_sentences[sentence] = (parse, modparse)

        # convert variables to the right types
        xloc = float(xloc)
        yloc = float(yloc)
        loc = (xloc, yloc)
        parse = ParentedTree.parse(parse)
        modparse = ParentedTree.parse(modparse)

        # how many ancestors should the sampled landmark have?
        num_ancestors = count_lmk_phrases(modparse) - 1

        if num_ancestors == -1:
            print 'Failed to parse %d [%s] [%s] [%s]' % (i, sentence, parse, modparse)
            continue

        # sample `args.iterations` times for each sentence
        for _ in xrange(args.iterations):
            lmk, rel = get_meaning(loc, num_ancestors)
            lmk, _, _ = lmk
            rel, _, _ = rel

            assert(not isinstance(lmk, tuple))
            assert(not isinstance(rel, tuple))

            if args.verbose: