Example #1
0
def ieer_headlines():

    from nltk.corpus import ieer
    from nltk import Tree

    print "IEER: First 20 Headlines"
    print "=" * 45

    trees = [
        doc.headline for file in ieer.files() for doc in ieer.parsed_docs(file)
    ]
    for tree in trees[:20]:
        print
        print "%s:\n%s" % (doc.docno, tree)
Example #2
0
def in_demo(trace=0):
 
    from nltk.corpus import ieer

    IN = re.compile(r'.*\bin\b(?!\b.+ing\b)')
    
    print
    print "in(ORG, LOC) -- just the clauses:"
    print "=" * 45

    for file in ieer.files():
        for doc in ieer.parsed_docs(file):
            if trace:
                print doc.docno
                print "=" * 15
            for rel in relextract('ORG', 'LOC', doc, pattern=IN):
                print show_clause(rel, relsym='IN')
Example #3
0
def roles_demo(trace=0):
    from nltk.corpus import ieer
    roles = """
    (.*(                   # assorted roles
    analyst|
    chair(wo)?man|
    commissioner|
    counsel|
    director|
    economist|
    editor|
    executive|         
    foreman|
    governor|
    head|
    lawyer|
    leader|
    librarian).*)|
    manager|
    partner|
    president|
    producer|
    professor|
    researcher|
    spokes(wo)?man|
    writer|
    ,\sof\sthe?\s*  # "X, of (the) Y"
    """
    ROLES = re.compile(roles, re.VERBOSE)
    
    print
    print "has_role(PER, ORG) -- raw rtuples:"
    print "=" * 45
    
    for file in ieer.files():
        for doc in ieer.parsed_docs(file):
            lcon = rcon = False
            if trace:
                print doc.docno
                print "=" * 15
                lcon = rcon = True
            for rel in relextract('PER', 'ORG', doc, pattern=ROLES):
                print show_raw_rtuple(rel, lcon=lcon, rcon=rcon)
Example #4
0
def in_demo(trace=0, sql=True):

    from nltk.corpus import ieer
    if sql:
        import sqlite3
        connection = sqlite3.connect(":memory:")
        connection.text_factory = sqlite3.OptimizedUnicode
        cur = connection.cursor()
        cur.execute("""create table Locations
        (OrgName text, LocationName text, DocID text)""")

    IN = re.compile(r'.*\bin\b(?!\b.+ing\b)')

    print
    print "IEER: in(ORG, LOC) -- just the clauses:"
    print "=" * 45

    for file in ieer.files():
        for doc in ieer.parsed_docs(file):
            if trace:
                print doc.docno
                print "=" * 15
            for rel in extract_rels('ORG', 'LOC', doc, pattern=IN):
                print show_clause(rel, relsym='IN')
                if sql:
                    rtuple = (rel['subjtext'], rel['objtext'], doc.docno)
                    cur.execute(
                        """insert into Locations 
                                values (?, ?, ?)""", rtuple)
                    connection.commit()

    if sql:
        cur.execute("""select OrgName from Locations
                    where LocationName = 'Atlanta'""")
        print
        print "Extract data from SQL table: ORGs in Atlanta"
        print "-" * 15
        for row in cur:
            print row