Example #1
0
def get_modparse(sentence):
    """returns the modified parse tree for a sentence"""
    sp_db = SentenceParse.get_sentence_parse(sentence)
    try:
        res = sp_db.all()[0]
        parsetree = res.original_parse
        modparsetree = res.modified_parse
    except:
        print "parse.py: 103: " + sentence
        parses = parse_sentences([sentence])
        if len(parses) == 0:
            raise ParseError(printcolors.WARNING + ('ParseError: a sentence was empty'))

        modparses = modify_parses(parses)
        for i,chunk in enumerate(modparses[:]):
            for j,modparse in enumerate(chunk):
                if 'LANDMARK-PHRASE' in modparse:
                    modparses[i] = modparse
                    parses[i] = parses[i][j]
                    break
            if isinstance(modparses[i],list):
                modparses[i] = modparses[i][0]
                parses[i] = parses[i][0]

        parsetree = parses[0]
        modparsetree = modparses[0]
        try:
            SentenceParse.add_sentence_parse(sentence, parsetree, modparsetree)
        except Exception as e:
            print e

    if count_lmk_phrases(ParentedTree.parse(modparsetree)) < 1:
        raise ParseError(printcolors.WARNING + ('ParseError: Parse contained no Landmark phrase.\nSentence: %s\nParse: %s\nModparse: %s' % (sentence,parsetree,modparsetree)))

    return parsetree, modparsetree
Example #2
0
def get_modparse(sentence):
    """returns the modified parse tree for a sentence"""
    sp_db = SentenceParse.get_sentence_parse(sentence)

    try:
        res = sp_db.one()
        modparsetree = res.modified_parse
    except NoResultFound:
        parsetree = parse_sentences([sentence])[0]
        modparsetree = modify_parses([parsetree])[0]
        SentenceParse.add_sentence_parse(sentence, parsetree, modparsetree)

    return modparsetree
Example #3
0
def get_modparse(sentence):
    """returns the modified parse tree for a sentence"""
    sp_db = SentenceParse.get_sentence_parse(sentence)

    try:
        res = sp_db.one()
        modparsetree = res.modified_parse
    except NoResultFound:
        parsetree = parse_sentences([sentence])[0]
        modparsetree = modify_parses([parsetree])[0]
        SentenceParse.add_sentence_parse(sentence, parsetree, modparsetree)

    return modparsetree
Example #4
0
                print 'landmark: %s (%s)' % (lmk, lmk_id(lmk))
                print 'relation: %s' % rel_type(rel)
                print 'parse:'
                print parse.pprint()
                print 'modparse:'
                print modparse.pprint()
                print '-' * 70

            location = Location(x=xloc, y=yloc)
            save_tree(modparse, location, rel, lmk)
            Bigram.make_bigrams(location.words)
            Trigram.make_trigrams(location.words)

        if i % 200 == 0: session.commit()

    if SentenceParse.query().count() == 0:
        print 'BLIND ADDING!!!!!!!!!!!'
        for sentence,(parse,modparse) in unique_sentences.items():
            SentenceParse.add_sentence_parse_blind(sentence, parse, modparse)
    else:
        print 'NOT BLIND ADDING!!!!!!!!!!!!'
        for sentence,(parse,modparse) in unique_sentences.items():
            SentenceParse.add_sentence_parse(sentence, parse, modparse)

    session.commit()

    print 'counting ...'

    # count words
    w1 = aliased(Word)
    w2 = aliased(Word)
Example #5
0
    #     print value
    # exit()

    # print words
    # print 'Sum:',sum(words.values())
    # # print typos
    # print 'Typos:',len(typos)
    # print 'Total:',total
    # print 'Good:',len(no_bad_words)

    # import IPython
    # IPython.embed()
    # exit()


    sp_db = SentenceParse.get_sentence_parse(all_descs[0])
    try:
        res = sp_db.all()[0]
    except IndexError:

        parses = parse_sentences(all_descs,n=5,threads=8)

        # temp = tempfile.NamedTemporaryFile()
        # for p in parses:
        #     temp.write(p)
        # temp.flush()
        # proc = subprocess.Popen(['java -mx100m -cp stanford-tregex/stanford-tregex.jar \
        #                           edu.stanford.nlp.trees.tregex.tsurgeon.Tsurgeon \
        #                           -s -treeFile %s surgery/*' % temp.name],
        #                         shell=True,
        #                         stdout=subprocess.PIPE,
Example #6
0
                print 'relation: %s' % rel_type(rel)
                print 'parse:'
                print parse.pprint()
                print 'modparse:'
                print modparse.pprint()
                print '-' * 70

            location = Location(x=xloc, y=yloc)
            save_tree(modparse, location, rel, lmk)
            Bigram.make_bigrams(location.words)
            Trigram.make_trigrams(location.words)

        if i % 200 == 0: session.commit()

    for sentence,(parse,modparse) in unique_sentences.items():
        SentenceParse.add_sentence_parse_blind(sentence, parse, modparse)

    session.commit()

    # count words
    parent = aliased(Production)
    qry = session.query(Word.word, Word.pos,
                        parent.landmark, parent.landmark_class, parent.landmark_orientation_relations, parent.landmark_color,
                        parent.relation, parent.relation_distance_class,
                        parent.relation_degree_class, func.count(Word.id)).\
                  join(parent, Word.parent).\
                  group_by(Word.word, Word.pos,
                           parent.landmark, parent.landmark_class, parent.landmark_orientation_relations, parent.landmark_color,
                           parent.relation, parent.relation_distance_class,
                           parent.relation_degree_class)
    for row in qry:
Example #7
0
                print 'relation: %s' % rel_type(rel)
                print 'parse:'
                print parse.pprint()
                print 'modparse:'
                print modparse.pprint()
                print '-' * 70

            location = Location(x=xloc, y=yloc)
            save_tree(modparse, location, rel, lmk)
            Bigram.make_bigrams(location.words)
            Trigram.make_trigrams(location.words)

        if i % 200 == 0: session.commit()

    for sentence,(parse,modparse) in unique_sentences.items():
        SentenceParse.add_sentence_parse_blind(sentence, parse, modparse)

    session.commit()

    print 'counting ...'

    # count words
    w1 = aliased(Word)
    w2 = aliased(Word)
    parent = aliased(Production)
    qry = session.query(w1.word, w2.word, w2.pos,
                        parent.lhs, parent.landmark, parent.landmark_class,
                        parent.landmark_orientation_relations, parent.landmark_color,
                        parent.relation, parent.relation_distance_class,
                        parent.relation_degree_class, func.count(w2.id)) \
                                .outerjoin(w1,Bigram.w1) \