Ejemplo n.º 1
0
def parse(sentence, use_cache=True, parser='stanford'):

    cache_key = "parse_trees_{0}".format(parser)
    valid_lines = None

    if use_cache:
        cache_attempt = cache_get(cache_key, sentence)
        if cache_attempt:
            valid_lines = cache_attempt

    if valid_lines is None:
        if parser == "stanford":
            response = parse_stanford(sentence, use_cache=use_cache)
        elif parser == "malt":
            response = parse_malt(sentence, use_cache=use_cache)
        else:
            return []

        valid_lines = [line for line in response.split("\n") if len(line) > 2 and line[0] == "(" and line[-1] == ")"]

        if use_cache:
            cache_set(cache_key, sentence, valid_lines)

    # throw away the garbgage we don't want from the parser's response.
    # this could probably get us in trouble since it'll hide errors etc,
    # but we got deadlines....
    trees = [ParentedTree.parse(line) for line in valid_lines]

    return trees
Ejemplo n.º 2
0
def gen(files):
    for f in files:
        with open(f) as fi:
            #set_trace()
            #leaves = ParentedTree.parse(fi.read()).leaves()
            pos = ParentedTree.parse(fi.read()).pos()
        yield makeRow(getLocalContext(pos), f)
Ejemplo n.º 3
0
def get_sentence_posteriors(sentence, iterations=1, extra_meaning=None):
    meaning_probs = {}
    # parse sentence with charniak and apply surgeries
    print 'parsing ...'
    modparse = get_modparse(sentence)
    t = ParentedTree.parse(modparse)
    print '\n%s\n' % t.pprint()
    num_ancestors = count_lmk_phrases(t) - 1

    for _ in xrange(iterations):
        (lmk, _, _), (rel, _, _) = get_meaning(num_ancestors=num_ancestors)
        meaning = m2s(lmk,rel)
        if meaning not in meaning_probs:
            ps = get_tree_probs(t, lmk, rel)[0]
            # print "Tree probs: ", zip(ps,rls)
            meaning_probs[meaning] = np.prod(ps)
        print '.'

    if extra_meaning:
        meaning = m2s(*extra_meaning)
        if meaning not in meaning_probs:
            ps = get_tree_probs(t, lmk, rel)[0]
            # print "Tree prob: ", zip(ps,rls)
            meaning_probs[meaning] = np.prod(ps)
        print '.'

    summ = sum(meaning_probs.values())
    for key in meaning_probs:
        meaning_probs[key] /= summ
    return meaning_probs.items()
Ejemplo n.º 4
0
def get_modparse(sentence):
    """returns the modified parse tree for a sentence"""
    sp_db = SentenceParse.get_sentence_parse(sentence)
    try:
        res = sp_db.all()[0]
        parsetree = res.original_parse
        modparsetree = res.modified_parse
    except:
        print "parse.py: 103: " + sentence
        parses = parse_sentences([sentence])
        if len(parses) == 0:
            raise ParseError(printcolors.WARNING + ('ParseError: a sentence was empty'))

        modparses = modify_parses(parses)
        for i,chunk in enumerate(modparses[:]):
            for j,modparse in enumerate(chunk):
                if 'LANDMARK-PHRASE' in modparse:
                    modparses[i] = modparse
                    parses[i] = parses[i][j]
                    break
            if isinstance(modparses[i],list):
                modparses[i] = modparses[i][0]
                parses[i] = parses[i][0]

        parsetree = parses[0]
        modparsetree = modparses[0]
        try:
            SentenceParse.add_sentence_parse(sentence, parsetree, modparsetree)
        except Exception as e:
            print e

    if count_lmk_phrases(ParentedTree.parse(modparsetree)) < 1:
        raise ParseError(printcolors.WARNING + ('ParseError: Parse contained no Landmark phrase.\nSentence: %s\nParse: %s\nModparse: %s' % (sentence,parsetree,modparsetree)))

    return parsetree, modparsetree
Ejemplo n.º 5
0
def get_sentence_meaning_likelihood(sentence, lmk, rel):
    modparse = get_modparse(sentence)
    t = ParentedTree.parse(modparse)
    print '\n%s\n' % t.pprint()

    probs, entropies, lrpc, tps = get_tree_probs(t, lmk, rel)
    if np.prod(probs) == 0.0:
        logger('ERROR: Probability product is 0 for sentence: %s, lmk: %s, rel: %s, probs: %s' % (sentence, lmk, rel, str(probs)))
    return np.prod(probs), sum(entropies), lrpc, tps
Ejemplo n.º 6
0
def get_all_sentence_posteriors(sentence, meanings, golden=False, printing=True):

    print 'parsing ...'
    _, modparse = get_modparse(sentence)
    t = ParentedTree.parse(modparse)
    print '\n%s\n' % t.pprint()
    num_ancestors = count_lmk_phrases(t) - 1

    # logger(len(meanings))
    lmks, rels = zip(*meanings)
    lmks = set(lmks)
    rels = set(rels)
    # logger('num things ' + str(len(lmks))+' '+str(len(rels)))

    syms = ['\\', '|', '/', '-']
    sys.stdout.write('processing...\\')
    sys.stdout.flush()
    posteriors = {}
    for i,lmk in enumerate(lmks):
        if lmk.get_ancestor_count() != num_ancestors:
            p = 0
        else:
            ps = get_tree_probs(t[1], lmk, golden=golden, printing=printing)[0]
            p = np.prod(ps)
        posteriors[lmk] = p
        sys.stdout.write("\b%s" % syms[i % len(syms)])
        sys.stdout.flush()

    for i,rel in enumerate(rels):
        ps = get_tree_probs(t[0], rel=rel, golden=golden, printing=printing)[0]
        posteriors[rel] = np.prod(ps)
        sys.stdout.write("\b%s" % syms[i % len(syms)])
        sys.stdout.flush()

    for j in range(50):
        sys.stdout.write("\b.%s" % syms[(i+j) % len(syms)])
        sys.stdout.flush()
    print


    # for meaning in meanings:
    #     lmk,rel = meaning
    #     if lmk.get_ancestor_count() != num_ancestors:
    #         p = 0
    #     else:
    #         ps = get_tree_probs(t, lmk, rel, printing=False)[0]
    #         p = np.prod(ps)
    #     posteriors.append(p)
        # print p, lmk, lmk.ori_relations, rel, (rel.distance, rel.measurement.best_degree_class, rel.measurement.best_distance_class ) if hasattr(rel,'measurement') else 'No measurement'
    return posteriors
Ejemplo n.º 7
0
def get_all_sentence_posteriors(sentence, meanings):

    print 'parsing ...'
    modparse = get_modparse(sentence)
    t = ParentedTree.parse(modparse)
    print '\n%s\n' % t.pprint()
    num_ancestors = count_lmk_phrases(t) - 1

    posteriors = []
    for meaning in meanings:
        lmk,rel = meaning
        if lmk.get_ancestor_count() != num_ancestors:
            p = 0
        else:
            ps = get_tree_probs(t, lmk, rel)[0]
            p = np.prod(ps)
        posteriors.append(p)
    return posteriors
Ejemplo n.º 8
0
def get_sentence_posteriors(sentence, iterations=1):
    probs = []
    meanings = []

    # parse sentence with charniak and apply surgeries
    print 'parsing ...'
    modparse = get_modparse(sentence)
    t = ParentedTree.parse(modparse)
    print '\n%s\n' % t.pprint()
    num_ancestors = count_lmk_phrases(t) - 1

    for _ in xrange(iterations):
        meaning = get_meaning(num_ancestors=num_ancestors)
        lmk, rel = meaning
        probs.append(get_tree_prob(t, *meaning))
        meanings.append(m2s(lmk,rel))
        print '.'

    probs = np.array(probs) / sum(probs)
    return uniquify_distribution(meanings,  probs)
Ejemplo n.º 9
0
    args = parser.parse_args()

    reader = csv.reader(args.csvfile, lineterminator="\n")
    next(reader)  # skip headers

    for i, row in enumerate(reader, start=1):
        print "sentence", i

        # unpack row
        xloc, yloc, sentence, parse, modparse = row

        # convert variables to the right types
        xloc = float(xloc)
        yloc = float(yloc)
        loc = (xloc, yloc)
        parse = ParentedTree.parse(parse)
        modparse = ParentedTree.parse(modparse)

        # how many ancestors should the sampled landmark have?
        num_ancestors = count_lmk_phrases(modparse) - 1

        # sample `args.iterations` times for each sentence
        for _ in xrange(args.iterations):
            lmk, rel = get_meaning(loc, num_ancestors)

            if args.verbose:
                print "utterance:", repr(sentence)
                print "location: %s" % repr(loc)
                print "landmark: %s (%s)" % (lmk, lmk_id(lmk))
                print "relation: %s" % rel_type(rel)
                print "parse:"
Ejemplo n.º 10
0
 def __init__(self, parse_tree):
     try:
         self.tree = ParentedTree.parse(parse_tree)
     except:
         self.tree = None
Ejemplo n.º 11
0



if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()
    # parser.add_argument('-n', '--num_iterations', type=int, default=1)
    # parser.add_argument('-l', '--location', type=Point)
    # parser.add_argument('--consistent', action='store_true')
    parser.add_argument('sentence')
    args = parser.parse_args()

    scene, speaker = construct_training_scene()

    print 'parsing ...'
    modparse = get_modparse(args.sentence)
    t = ParentedTree.parse(modparse)
    print '\n%s\n' % t.pprint()

    print_tree_entropy(t)

    raw_input()

    parts = build_meaning(t)
    for part in parts:
        print "Suggested for", part[0]
        items = sorted(part[1].items(), key=lambda x: x[1],reverse=True)
        for item in items:
            print '    ',rjust(item[0],40),item[1]
        print
Ejemplo n.º 12
0
def stanford_tree_reader(nlp):
    all_sentences = []
    for s in nlp["sentences"]:
        all_sentences.append(ParentedTree.parse(s["parsetree"]))
    return all_sentences
Ejemplo n.º 13
0
    next(reader)  # skip headers

    unique_sentences = {}

    for i,row in enumerate(reader, start=1):
        print 'sentence', i

        # unpack row
        xloc, yloc, sentence, parse, modparse = row
        unique_sentences[sentence] = (parse, modparse)

        # convert variables to the right types
        xloc = float(xloc)
        yloc = float(yloc)
        loc = (xloc, yloc)
        parse = ParentedTree.parse(parse)
        modparse = ParentedTree.parse(modparse)

        # how many ancestors should the sampled landmark have?
        num_ancestors = count_lmk_phrases(modparse) - 1

        if num_ancestors == -1:
            print 'Failed to parse %d [%s] [%s] [%s]' % (i, sentence, parse, modparse)
            continue

        # sample `args.iterations` times for each sentence
        for _ in xrange(args.iterations):
            lmk, rel = get_meaning(loc, num_ancestors)
            lmk, _, _ = lmk
            rel, _, _ = rel
Ejemplo n.º 14
0
            utils.scene.set_scene(scene,speaker)
            table = scene.landmarks['table'].representation.rect
        t_min = table.min_point
        t_max = table.max_point
        t_w = table.width
        t_h = table.height

        xloc,yloc = random()*t_w+t_min.x, random()*t_h+t_min.y
        trajector = Landmark( 'point', PointRepresentation(Vec2(xloc,yloc)), None, Landmark.POINT)
        sentence, rel, lmk = speaker.describe(trajector, scene, False, 1)
        parsestring, modparsestring = get_modparse(sentence)
        unique_sentences[sentence] = (parsestring, modparsestring)

        # convert variables to the right types
        loc = (xloc, yloc)
        parse = ParentedTree.parse(parsestring)
        modparse = ParentedTree.parse(modparsestring)

        # how many ancestors should the sampled landmark have?
        num_ancestors = count_lmk_phrases(modparse) - 1

        if num_ancestors == -1:
            print 'Failed to parse %d [%s] [%s] [%s]' % (i, sentence, parse, modparse)
            continue

        assert(not isinstance(lmk, tuple))
        assert(not isinstance(rel, tuple))

        if args.verbose:
            print 'utterance:', repr(sentence)
            print 'location: %s' % repr(loc)
Ejemplo n.º 15
0
def stanford_tree_reader(nlp):
    all_sentences = []
    for s in nlp["sentences"]:
        all_sentences.append(ParentedTree.parse(s["parsetree"]))
    return all_sentences
Ejemplo n.º 16
0
def train( meaning, sentence, update=1, printing=False):
    lmk,rel = meaning
    _, modparse = get_modparse(sentence)
    t = ParentedTree.parse(modparse)

    train_rec( tree=t, lmk=lmk, rel=rel, update=update, printing=printing)