def get_modparse(sentence): """returns the modified parse tree for a sentence""" sp_db = SentenceParse.get_sentence_parse(sentence) try: res = sp_db.all()[0] parsetree = res.original_parse modparsetree = res.modified_parse except: print "parse.py: 103: " + sentence parses = parse_sentences([sentence]) if len(parses) == 0: raise ParseError(printcolors.WARNING + ('ParseError: a sentence was empty')) modparses = modify_parses(parses) for i,chunk in enumerate(modparses[:]): for j,modparse in enumerate(chunk): if 'LANDMARK-PHRASE' in modparse: modparses[i] = modparse parses[i] = parses[i][j] break if isinstance(modparses[i],list): modparses[i] = modparses[i][0] parses[i] = parses[i][0] parsetree = parses[0] modparsetree = modparses[0] try: SentenceParse.add_sentence_parse(sentence, parsetree, modparsetree) except Exception as e: print e if count_lmk_phrases(ParentedTree.parse(modparsetree)) < 1: raise ParseError(printcolors.WARNING + ('ParseError: Parse contained no Landmark phrase.\nSentence: %s\nParse: %s\nModparse: %s' % (sentence,parsetree,modparsetree))) return parsetree, modparsetree
def get_modparse(sentence): """returns the modified parse tree for a sentence""" sp_db = SentenceParse.get_sentence_parse(sentence) try: res = sp_db.one() modparsetree = res.modified_parse except NoResultFound: parsetree = parse_sentences([sentence])[0] modparsetree = modify_parses([parsetree])[0] SentenceParse.add_sentence_parse(sentence, parsetree, modparsetree) return modparsetree
print 'landmark: %s (%s)' % (lmk, lmk_id(lmk)) print 'relation: %s' % rel_type(rel) print 'parse:' print parse.pprint() print 'modparse:' print modparse.pprint() print '-' * 70 location = Location(x=xloc, y=yloc) save_tree(modparse, location, rel, lmk) Bigram.make_bigrams(location.words) Trigram.make_trigrams(location.words) if i % 200 == 0: session.commit() if SentenceParse.query().count() == 0: print 'BLIND ADDING!!!!!!!!!!!' for sentence,(parse,modparse) in unique_sentences.items(): SentenceParse.add_sentence_parse_blind(sentence, parse, modparse) else: print 'NOT BLIND ADDING!!!!!!!!!!!!' for sentence,(parse,modparse) in unique_sentences.items(): SentenceParse.add_sentence_parse(sentence, parse, modparse) session.commit() print 'counting ...' # count words w1 = aliased(Word) w2 = aliased(Word)
# print value # exit() # print words # print 'Sum:',sum(words.values()) # # print typos # print 'Typos:',len(typos) # print 'Total:',total # print 'Good:',len(no_bad_words) # import IPython # IPython.embed() # exit() sp_db = SentenceParse.get_sentence_parse(all_descs[0]) try: res = sp_db.all()[0] except IndexError: parses = parse_sentences(all_descs,n=5,threads=8) # temp = tempfile.NamedTemporaryFile() # for p in parses: # temp.write(p) # temp.flush() # proc = subprocess.Popen(['java -mx100m -cp stanford-tregex/stanford-tregex.jar \ # edu.stanford.nlp.trees.tregex.tsurgeon.Tsurgeon \ # -s -treeFile %s surgery/*' % temp.name], # shell=True, # stdout=subprocess.PIPE,
print 'relation: %s' % rel_type(rel) print 'parse:' print parse.pprint() print 'modparse:' print modparse.pprint() print '-' * 70 location = Location(x=xloc, y=yloc) save_tree(modparse, location, rel, lmk) Bigram.make_bigrams(location.words) Trigram.make_trigrams(location.words) if i % 200 == 0: session.commit() for sentence,(parse,modparse) in unique_sentences.items(): SentenceParse.add_sentence_parse_blind(sentence, parse, modparse) session.commit() # count words parent = aliased(Production) qry = session.query(Word.word, Word.pos, parent.landmark, parent.landmark_class, parent.landmark_orientation_relations, parent.landmark_color, parent.relation, parent.relation_distance_class, parent.relation_degree_class, func.count(Word.id)).\ join(parent, Word.parent).\ group_by(Word.word, Word.pos, parent.landmark, parent.landmark_class, parent.landmark_orientation_relations, parent.landmark_color, parent.relation, parent.relation_distance_class, parent.relation_degree_class) for row in qry:
print 'relation: %s' % rel_type(rel) print 'parse:' print parse.pprint() print 'modparse:' print modparse.pprint() print '-' * 70 location = Location(x=xloc, y=yloc) save_tree(modparse, location, rel, lmk) Bigram.make_bigrams(location.words) Trigram.make_trigrams(location.words) if i % 200 == 0: session.commit() for sentence,(parse,modparse) in unique_sentences.items(): SentenceParse.add_sentence_parse_blind(sentence, parse, modparse) session.commit() print 'counting ...' # count words w1 = aliased(Word) w2 = aliased(Word) parent = aliased(Production) qry = session.query(w1.word, w2.word, w2.pos, parent.lhs, parent.landmark, parent.landmark_class, parent.landmark_orientation_relations, parent.landmark_color, parent.relation, parent.relation_distance_class, parent.relation_degree_class, func.count(w2.id)) \ .outerjoin(w1,Bigram.w1) \