def article_worker(article, pub_time, source, allkeywords, DiffName):
    # thread worker function
    sema.acquire()
    t = threading.currentThread()
    # print t, article
    article_NERT_parser.main(article, pub_time, source, allkeywords, DiffName,  "PERSON")
    sema.release()
Ejemplo n.º 2
0
def main():
    HouseRep = {}
    HouseReps = [
        u'Sherry Appleton', u'Andrew Barkis', u'Steve Bergquist',
        u'Brian Blake', u'Vincent Buys', u'Michelle Caldier',
        u'Bruce Chandler', u'Mike Chapman', u'Frank Chopp', u'Judy Clibborn',
        u'Eileen Cody', u'Cary Condotta', u'Richard DeBolt', u'Tom Dent',
        u'Beth Doglio', u'Laurie Dolan', u'Mary Dye', u'Carolyn Eslick',
        u'Jake Fey', u'Joe Fitzgibbon', u'Noel Frame', u'Roger Goodman',
        u'Paul Graves', u'Mia Gregerson', u'Dan Griffey', u'Larry Haler',
        u'Drew Hansen', u'Mark Hargrove', u'Mark Harmsworth', u'Paul Harris',
        u'Dave Hayes', u'Jeff Holy', u'Zack Hudgins', u'Morgan Irwin',
        u'Bill Jenkin', u'Laurie Jinkins', u'Norm Johnson', u'Ruth Kagi',
        u'Christine Kilduff', u'Steve Kirby', u'Brad Klippert',
        u'Shelley Kloba', u'Vicki Kraft', u'Joel Kretz', u'Dan Kristiansen',
        u'John Lovick', u'Kristine Lytton', u'Drew MacEwen', u'Nicole Macri',
        u'Matt Manweller', u'Jacquelin Maycumber', u'Joan McBride',
        u'Gina McCabe', u'Bob McCaslin', u'Joyce McDonald', u'Jeff Morris',
        u'Dick Muri', u'Terry Nealey', u'Ed Orcutt', u'Timm Ormsby',
        u'Lillian Ortiz-Self', u'Tina Orwall', u'Mike Pellicciotti',
        u'Strom Peterson', u'Eric Pettigrew', u'Liz Pike', u'Gerry Pollet',
        u'Kristine Reeves', u'Marcus Riccelli', u'June Robinson', u'Jay Rodne',
        u'Cindy Ryu', u'Sharon Tomiko Santos', u'David Sawyer', u'Joe Schmick',
        u'Mike Sells', u'Tana Senn', u'Matt Shea', u'Vandana Slatter',
        u'Norma Smith', u'Larry Springer', u'Melanie Stambaugh',
        u'Derek Stanford', u'Mike Steele', u'Drew Stokesbary',
        u'Monica Jurado Stonier', u'Pat Sullivan', u'Gael Tarleton',
        u'David Taylor', u'Steve Tharinger', u'Javier Valdez',
        u'Luanne Van Werven', u'Brandon Vick', u'Mike Volz', u'Jim Walsh',
        u'J.T. Wilcox', u'Sharon Wylie', u'Jesse Young'
    ]

    senator = {}
    senators = [
        u'Jan Angel', u'Barbara Bailey', u'Michael Baumgartner',
        u'Randi Becker', u'Andy Billig', u'John Braun', u'Sharon Brown',
        u'Reuven Carlyle', u'Maralyn Chase', u'Annette Cleveland',
        u'Steve Conway', u'Jeannie Darneille', u'Manka Dhingra',
        u'Doug Ericksen', u'Joe Fain', u'Phil Fortunato', u'David Frockt',
        u'Bob Hasegawa', u'Brad Hawkins', u'Steve Hobbs', u'Jim Honeyford',
        u'Sam Hunt', u'Karen Keiser', u'Curtis King', u'Patty Kuderer',
        u'Marko Liias', u'John McCoy', u'Mark Miloscia', u'Mark Mullet',
        u'Sharon Nelson', u"Steve O'Ban", u'Mike Padden', u'Guy Palumbo',
        u'Kirk Pearson', u'Jamie Pedersen', u'Kevin Ranker', u'Ann Rivers',
        u'Christine Rolfes', u'Rebecca Salda\xc3a', u'Mark Schoesler',
        u'Tim Sheldon', u'Shelly Short', u'Dean Takko', u'Kevin Van De Wege',
        u'Maureen Walsh', u'Judy Warnick', u'Lisa Wellman', u'Lynda Wilson',
        u'Hans Zeiger'
    ]

    Governer = {}
    Governers = [u'Jay Inslee']

    DiffName = {'Jay Inslee': 'Governor Inslee'}

    washington_senators_us = {}
    utah_senators_us = [u'Patty Murray', u'Maria Cantwell']

    washington_reps_us = {}
    utah_reps_us = [
        u'Adam Smith', u'Cathy McMorris Rodgers', u'Dan Newhouse',
        u'Dave Reichert', u'Denny Heck', u'Derek Kilmer',
        u'Jaime Herrera Beutler', u'Pramila Jayapal', u'Rick Larsen',
        u'Suzan DelBene'
    ]

    for HP in HouseReps:
        hps = HP.split()
        HouseRep[" ".join(hps[1:])] = hps[0]

    senators += utah_senators_us
    for sen in senators:
        sens = sen.split()
        senator[" ".join(sens[1:])] = sens[0]

    for gov in Governers:
        govs = gov.split()
        Governer[" ".join(govs[1:])] = govs[0]

    # for usen in utah_senators_us:
    #     usens = usen.split()
    #     washington_senators_us[" ".join(usens[1:])] = usens[0]

    for urep in utah_reps_us:
        ureps = urep.split()
        washington_reps_us[" ".join(ureps[1:])] = ureps[0]

    names = {
        "represenative": HouseRep,
        "senator": senator,
        "governor": Governer,
        "washington_senators_us": washington_senators_us,
        "washington_reps_us": washington_reps_us
    }

    currentTime = []
    urls = {}
    # allkeywords = Governer + senators + HouseReps + utah_senators_us +utah_reps_us
    # allkeywords = ",".join(allkeywords)
    # allkeywords = str(Governer) + "," + str(senators) + "," + str(HouseReps) + "," + str(utah_senators_us) + "," + str(utah_reps_us)
    # allkeywords = allkeywords.replace("'", "")
    # allkeywords = allkeywords.replace("]", "")
    # allkeywords = allkeywords.replace("[", "")
    currentTime.append((time.strftime("%x").replace("/", " ")).split())
    currentTime.append((time.strftime("%X").replace(":", " ")).split())

    article_NERT_parser.main(
        'http://www.theolympian.com/entertainment/celebrities/article192108104.html#navlink=SecList',
        [['12', '14', '17'], ['21', '17', '46']],
        "Washington Education Association", names, DiffName, "PERSON")