def ieer_headlines(): from nltk.corpus import ieer from nltk import Tree print "IEER: First 20 Headlines" print "=" * 45 trees = [ doc.headline for file in ieer.files() for doc in ieer.parsed_docs(file) ] for tree in trees[:20]: print print "%s:\n%s" % (doc.docno, tree)
def in_demo(trace=0): from nltk.corpus import ieer IN = re.compile(r'.*\bin\b(?!\b.+ing\b)') print print "in(ORG, LOC) -- just the clauses:" print "=" * 45 for file in ieer.files(): for doc in ieer.parsed_docs(file): if trace: print doc.docno print "=" * 15 for rel in relextract('ORG', 'LOC', doc, pattern=IN): print show_clause(rel, relsym='IN')
def roles_demo(trace=0): from nltk.corpus import ieer roles = """ (.*( # assorted roles analyst| chair(wo)?man| commissioner| counsel| director| economist| editor| executive| foreman| governor| head| lawyer| leader| librarian).*)| manager| partner| president| producer| professor| researcher| spokes(wo)?man| writer| ,\sof\sthe?\s* # "X, of (the) Y" """ ROLES = re.compile(roles, re.VERBOSE) print print "has_role(PER, ORG) -- raw rtuples:" print "=" * 45 for file in ieer.files(): for doc in ieer.parsed_docs(file): lcon = rcon = False if trace: print doc.docno print "=" * 15 lcon = rcon = True for rel in relextract('PER', 'ORG', doc, pattern=ROLES): print show_raw_rtuple(rel, lcon=lcon, rcon=rcon)
def in_demo(trace=0, sql=True): from nltk.corpus import ieer if sql: import sqlite3 connection = sqlite3.connect(":memory:") connection.text_factory = sqlite3.OptimizedUnicode cur = connection.cursor() cur.execute("""create table Locations (OrgName text, LocationName text, DocID text)""") IN = re.compile(r'.*\bin\b(?!\b.+ing\b)') print print "IEER: in(ORG, LOC) -- just the clauses:" print "=" * 45 for file in ieer.files(): for doc in ieer.parsed_docs(file): if trace: print doc.docno print "=" * 15 for rel in extract_rels('ORG', 'LOC', doc, pattern=IN): print show_clause(rel, relsym='IN') if sql: rtuple = (rel['subjtext'], rel['objtext'], doc.docno) cur.execute( """insert into Locations values (?, ?, ?)""", rtuple) connection.commit() if sql: cur.execute("""select OrgName from Locations where LocationName = 'Atlanta'""") print print "Extract data from SQL table: ORGs in Atlanta" print "-" * 15 for row in cur: print row