def process_sopr_filing(sopr_xml_file): from sunlightapi import live_settings as DJ_SETTINGS DJ_APPLABEL = 'lobbyists' saucebrush.run_recipe(lobbyists.parse_filings(sopr_xml_file), # flatten non-list dictionaries & clean up some fields DictFlattener(['filing', 'client', 'registrant']), FieldRemover(['govt_entities', 'affiliated_orgs', 'foreign_entities', 'client_state_or_local_gov', 'client_status', 'filing_affiliated_orgs_url']), FieldRenamer({'filing_date': 'filing_filing_date'}), # process names & dates FieldAdder('client_contact_name', ''), NameCleaner('client_contact_name', prefix='client_contact_', nomatch_name='client_raw_contact_name'), FieldModifier('filing_date', lambda x: x.split('.')[0]), DateCleaner('filing_date', from_format='%Y-%m-%dT%H:%M:%S', to_format='%Y-%m-%d'), # flatten lists Flattener(['issues', 'lobbyists']), FieldCopier({'issues.filing_id': 'filing_id', 'lobbyists.filing_id': 'filing_id'}), # handle lists saucebrush.filters.Splitter({ 'issues':[DjangoModelEmitter(DJ_SETTINGS, DJ_APPLABEL, 'issue')], 'lobbyists':[FieldRemover(['indicator', 'status']), NameCleaner('name', nomatch_name='raw_name'), Unique(), # remove some duplicate lobbyists on a form DjangoModelEmitter(DJ_SETTINGS, DJ_APPLABEL, 'lobbyist') ], }), FieldRemover(['issues', 'lobbyists']), DjangoModelEmitter(DJ_SETTINGS, DJ_APPLABEL, 'filing') )
def test_ownership_percentage(self): """Parse foreign entity ownership percentage""" filings = list(lobbyists.parse_filings(util.testpath('foreign_entity_ownership_percentage.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '4CAC3894-FA4C-4CEC-99C7-1141544CA49B') entities = x['foreign_entities'] e = entities.pop()['foreign_entity'] self.failUnlessEqual(e['ownership_percentage'], None) e = entities.pop()['foreign_entity'] self.failUnlessEqual(e['ownership_percentage'], 100) self.failUnlessEqual(len(entities), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'E4382341-0E5D-4A31-8A7D-3CCB71E8EF6E') entities = x['foreign_entities'] e = entities.pop()['foreign_entity'] self.failUnlessEqual(e['ownership_percentage'], 34) self.failUnlessEqual(len(entities), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'EEDBB5F5-8BB4-4E0D-9F10-CD8FDD2A0D70') entities = x['foreign_entities'] e = entities.pop()['foreign_entity'] self.failUnlessEqual(e['ownership_percentage'], 0) self.failUnlessEqual(len(entities), 0) self.failUnlessEqual(len(filings), 0)
def test_state_or_local_gov(self): "Parse client state or local government status" "" filings = list( lobbyists.parse_filings( util.testpath('client_state_or_local_gov.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '31733890-2D8D-414E-8EF1-08701CBC5871') client = x['client'] self.failUnlessEqual(client['state_or_local_gov'], 'y') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '045E8E33-BBEA-437C-844D-D1D6057AA2A0') client = x['client'] self.failUnlessEqual(client['state_or_local_gov'], 'unspecified') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '8949C958-ABF0-49B8-8ACF-0026E92C2B13') client = x['client'] self.failUnlessEqual(client['state_or_local_gov'], 'n') self.failUnlessEqual(len(filings), 0)
def test_description(self): """Parse client description""" filings = list( lobbyists.parse_filings(util.testpath('client_description.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'CCB41994-81FB-4C32-A155-082164564403') client = x['client'] self.failUnlessEqual(client['description'], 'unspecified') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'BD57AEBC-D1D7-4867-880E-0711F5AABD17') client = x['client'] self.failUnlessEqual(client['description'], 'unspecified') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '530238AB-4144-484F-8297-00D4A969779C') client = x['client'] self.failUnlessEqual(client['description'], 'DISABILITY RESEARCH') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '1402A175-987F-4896-B0F5-00107EF69834') client = x['client'] self.failUnlessEqual( client['description'], 'Distributor of tactical clothing and gear for the military and law enforcement.' ) self.failUnlessEqual(len(filings), 0)
def test_official_position(self): """Parse lobbyist 'official position'""" filings = list( lobbyists.parse_filings( util.testpath('lobbyist_official_position.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'AB94AB3D-F5D6-4EE8-A462-0925A6D9A499') lobbiers = x['lobbyists'] l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'N/A') l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'MGR. AIR TRAFFIC DIV. WEST PAC, REG, FAA') self.failUnlessEqual(len(lobbiers), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'BD894C51-AA23-46AE-9802-006B8C91702B') lobbiers = x['lobbyists'] l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'ExecFlrAsst, H. Maj. Whip; ExecDir, H.DemCauc.') l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'StaffAsst, DemPolicyComm; FlrAsst, MinoritySec') l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'Chief of Staff, President Reagan') l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'AsstEditor/Ed./Res.Dir, Sen.Rep.PolicyComm;') self.failUnlessEqual(len(lobbiers), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'DE038A45-9F6B-4764-B678-8004E7903BC4') lobbiers = x['lobbyists'] l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'unspecified') self.failUnlessEqual(len(lobbiers), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '2164D6BB-EBBA-40D2-9C18-16A2D670030A') lobbiers = x['lobbyists'] l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'N/A') l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'N/A') l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'N/A') l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'N/A') l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'N/A') self.failUnlessEqual(len(lobbiers), 0) self.failUnlessEqual(len(filings), 0)
def test_import_registrants(self): """Import registrants.""" filings = list(lobbyists.parse_filings(util.testpath('registrants.xml'))) con = sqlite3.connect(':memory:') con = lobbyists.create_db(con) cur = con.cursor() self.failUnless(lobbyists.import_filings(cur, filings)) con.row_factory = sqlite3.Row cur = con.cursor() cur.execute("SELECT filing_registrant.filing AS filing_id, \ filing_registrant.address AS address, \ filing_registrant.description AS description, \ registrant.country AS country, \ registrant.senate_id AS senate_id, \ registrant.name AS name, \ registrant.ppb_country AS ppb_country \ FROM filing_registrant INNER JOIN registrant ON \ registrant.id=filing_registrant.registrant") rows = [row for row in cur] rows.sort(key=lambda x: x['filing_id']) registrants = [x for x in filings if 'registrant' in x] registrants.sort(key=lambda x: x['filing']['id']) self.failUnlessEqual(len(rows), len(registrants)) for (row, filing) in zip(rows, registrants): self.failUnlessEqual(row['filing_id'], filing['filing']['id']) reg = filing['registrant'] self.failUnlessEqual(row['address'], reg['address']) self.failUnlessEqual(row['description'], reg['description']) self.failUnlessEqual(row['country'], reg['country']) self.failUnlessEqual(row['senate_id'], reg['senate_id']) self.failUnlessEqual(row['name'], reg['name']) self.failUnlessEqual(row['ppb_country'], reg['ppb_country'])
def test_name(self): """Parse affiliated org name""" filings = list(lobbyists.parse_filings(util.testpath('affiliated_org_name.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'E8A4D9C9-2D0B-4F0A-966D-A076858D2751') orgs = x['affiliated_orgs'] o = orgs.pop()['org'] self.failUnlessEqual(o['name'], 'N/A') self.failUnlessEqual(len(orgs), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'C8293344-9A8D-4D6F-AAA5-25925E60BED9') orgs = x['affiliated_orgs'] o = orgs.pop()['org'] self.failUnlessEqual(o['name'], 'CARITAS CHRISTI') o = orgs.pop()['org'] self.failUnlessEqual(o['name'], 'BOSTON MEDICAL CENTER') o = orgs.pop()['org'] self.failUnlessEqual(o['name'], 'PARTNERS HEALTHCARE SYSTEM') o = orgs.pop()['org'] self.failUnlessEqual(o['name'], 'DANA FARBER CANCER INSTITUTE') self.failUnlessEqual(len(orgs), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '6D4AFEE6-E886-4993-B153-14A887FD325A') orgs = x['affiliated_orgs'] o = orgs.pop()['org'] self.failUnlessEqual(o['name'], "Land O'Lakes, Inc.") self.failUnlessEqual(len(filings), 0)
def test_description(self): """Parse client description""" filings = list(lobbyists.parse_filings(util.testpath('client_description.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'CCB41994-81FB-4C32-A155-082164564403') client = x['client'] self.failUnlessEqual(client['description'], 'unspecified') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'BD57AEBC-D1D7-4867-880E-0711F5AABD17') client = x['client'] self.failUnlessEqual(client['description'], 'unspecified') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '530238AB-4144-484F-8297-00D4A969779C') client = x['client'] self.failUnlessEqual(client['description'], 'DISABILITY RESEARCH') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '1402A175-987F-4896-B0F5-00107EF69834') client = x['client'] self.failUnlessEqual(client['description'], 'Distributor of tactical clothing and gear for the military and law enforcement.') self.failUnlessEqual(len(filings), 0)
def test_amount(self): """Parse filing amount""" filings = list(lobbyists.parse_filings(util.testpath('amounts.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'BAA88635-8674-4DF8-8825-2B0B3D8B4554') self.failUnlessEqual(f['amount'], 108000) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '9648F901-BA48-4EE5-BE8B-01D5551BFDA1') self.failUnlessEqual(f['amount'], 20000) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '8F21CC08-E136-4A42-A51D-25FE3B6CC303') self.failUnlessEqual(f['amount'], 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'DE669D92-0620-4257-8B0C-01922EA0A226') self.failUnlessEqual(f['amount'], None) # None means unspecified amount x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '5DA4C8F8-4E2D-4EE1-895C-00369A8222FB') self.failUnlessEqual(f['amount'], None) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'DB4CCA2C-1E51-46A7-8800-00201697E905') self.failUnlessEqual(f['amount'], None) self.failUnlessEqual(len(filings), 0)
def test_import_affiliated_orgs_country(self): """Importing affiliated orgs should fill the 'country' table.""" filings = list(lobbyists.parse_filings(util.testpath('affiliated_orgs.xml'))) con = sqlite3.connect(':memory:') con = lobbyists.create_db(con) cur = con.cursor() self.failUnless(lobbyists.import_filings(cur, filings)) con.row_factory = sqlite3.Row cur = con.cursor() cur.execute("SELECT * FROM country") rows = [row for row in cur] row = rows.pop() self.failUnlessEqual(row['name'], 'PUERTO RICO') row = rows.pop() self.failUnlessEqual(row['name'], 'unspecified') row = rows.pop() self.failUnlessEqual(row['name'], 'UNITED KINGDOM') row = rows.pop() self.failUnlessEqual(row['name'], 'UNDETERMINED') row = rows.pop() self.failUnlessEqual(row['name'], '<SELECT ONE>') row = rows.pop() self.failUnlessEqual(row['name'], 'USA') self.failUnlessEqual(len(rows), 0)
def test_import_affiliated_orgs_country(self): """Importing affiliated orgs should fill the 'country' table.""" filings = list( lobbyists.parse_filings(util.testpath('affiliated_orgs.xml'))) con = sqlite3.connect(':memory:') con = lobbyists.create_db(con) cur = con.cursor() self.failUnless(lobbyists.import_filings(cur, filings)) con.row_factory = sqlite3.Row cur = con.cursor() cur.execute("SELECT * FROM country") rows = [row for row in cur] row = rows.pop() self.failUnlessEqual(row['name'], 'PUERTO RICO') row = rows.pop() self.failUnlessEqual(row['name'], 'unspecified') row = rows.pop() self.failUnlessEqual(row['name'], 'UNITED KINGDOM') row = rows.pop() self.failUnlessEqual(row['name'], 'UNDETERMINED') row = rows.pop() self.failUnlessEqual(row['name'], '<SELECT ONE>') row = rows.pop() self.failUnlessEqual(row['name'], 'USA') self.failUnlessEqual(len(rows), 0)
def test_status(self): """Parse lobbyist status""" filings = list( lobbyists.parse_filings(util.testpath('lobbyist_status.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '02DDA99B-725A-4DBA-8397-34892A6918D7') lobbiers = x['lobbyists'] l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['status'], 'terminated') self.failUnlessEqual(len(lobbiers), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'AB94AB3D-F5D6-4EE8-A462-0925A6D9A499') lobbiers = x['lobbyists'] l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['status'], 'active') l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['status'], 'terminated') self.failUnlessEqual(len(lobbiers), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '04926911-8A12-4A0E-9DA4-510869446EAC') lobbiers = x['lobbyists'] l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['status'], 'undetermined') self.failUnlessEqual(len(lobbiers), 0) self.failUnlessEqual(len(filings), 0)
def test_description(self): """Parse registrant description""" filings = list( lobbyists.parse_filings( util.testpath('registrant_descriptions.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'C35AF239-B3BF-45A8-A5F0-11B73F8C7D64') reg = x['registrant'] self.failUnlessEqual(reg['description'], u'Government Relations & Strategic Consulting') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'D0325DF2-82F6-4FF1-8C72-08B9CC3E99D7') reg = x['registrant'] self.failUnlessEqual(reg['description'], 'defense/energy/interior consulting') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '443C63BC-F0DB-41F7-B912-002CABBF0CAD') reg = x['registrant'] self.failUnlessEqual(reg['description'], 'unspecified') self.failUnlessEqual(len(filings), 0)
def test_import_foreign_entities_country(self): """Importing foreign entities should fill the 'country' table.""" filings = list( lobbyists.parse_filings(util.testpath('foreign_entities.xml'))) con = sqlite3.connect(':memory:') con = lobbyists.create_db(con) cur = con.cursor() self.failUnless(lobbyists.import_filings(cur, filings)) con.row_factory = sqlite3.Row cur = con.cursor() cur.execute("SELECT * FROM country") rows = [row for row in cur] row = rows.pop() self.failUnlessEqual(row['name'], 'AUSTRALIA') row = rows.pop() self.failUnlessEqual(row['name'], 'UNDETERMINED') row = rows.pop() self.failUnlessEqual(row['name'], 'SEYCHELLES') row = rows.pop() self.failUnlessEqual(row['name'], 'UNITED KINGDOM') row = rows.pop() self.failUnlessEqual(row['name'], '<SELECT ONE>') row = rows.pop() self.failUnlessEqual(row['name'], 'unspecified') row = rows.pop() self.failUnlessEqual(row['name'], 'USA') row = rows.pop() self.failUnlessEqual(row['name'], 'GERMANY') row = rows.pop() self.failUnlessEqual(row['name'], 'JAPAN') self.failUnlessEqual(len(rows), 0)
def test_description(self): """Parse registrant description""" filings = list(lobbyists.parse_filings(util.testpath('registrant_descriptions.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'C35AF239-B3BF-45A8-A5F0-11B73F8C7D64') reg = x['registrant'] self.failUnlessEqual(reg['description'], u'Government Relations & Strategic Consulting') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'D0325DF2-82F6-4FF1-8C72-08B9CC3E99D7') reg = x['registrant'] self.failUnlessEqual(reg['description'], 'defense/energy/interior consulting') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '443C63BC-F0DB-41F7-B912-002CABBF0CAD') reg = x['registrant'] self.failUnlessEqual(reg['description'], 'unspecified') self.failUnlessEqual(len(filings), 0)
def test_name(self): """Parse affiliated org name""" filings = list( lobbyists.parse_filings(util.testpath('affiliated_org_name.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'E8A4D9C9-2D0B-4F0A-966D-A076858D2751') orgs = x['affiliated_orgs'] o = orgs.pop()['org'] self.failUnlessEqual(o['name'], 'N/A') self.failUnlessEqual(len(orgs), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'C8293344-9A8D-4D6F-AAA5-25925E60BED9') orgs = x['affiliated_orgs'] o = orgs.pop()['org'] self.failUnlessEqual(o['name'], 'CARITAS CHRISTI') o = orgs.pop()['org'] self.failUnlessEqual(o['name'], 'BOSTON MEDICAL CENTER') o = orgs.pop()['org'] self.failUnlessEqual(o['name'], 'PARTNERS HEALTHCARE SYSTEM') o = orgs.pop()['org'] self.failUnlessEqual(o['name'], 'DANA FARBER CANCER INSTITUTE') self.failUnlessEqual(len(orgs), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '6D4AFEE6-E886-4993-B153-14A887FD325A') orgs = x['affiliated_orgs'] o = orgs.pop()['org'] self.failUnlessEqual(o['name'], "Land O'Lakes, Inc.") self.failUnlessEqual(len(filings), 0)
def test_address(self): """Parse registrant address""" filings = list(lobbyists.parse_filings(util.testpath('registrant_addrs.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'D3EEF6D2-FE0B-4A03-A633-AAA16C50BE89') reg = x['registrant'] self.failUnlessEqual(reg['address'], 'Waterside P. O. Box 365\r\nHarmondworth, West Drayto\r\nBE\r\nBELGIUM') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'D4AFC576-0B22-4CE7-B595-141BE8ABC8DC') reg = x['registrant'] self.failUnlessEqual(reg['address'], 'unspecified') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'CD6A4955-8D7B-44C4-A3E4-00603FAC03A3') reg = x['registrant'] self.failUnlessEqual(reg['address'], '101 Constitution Avenue, NW\r\nSuite 600 West\r\nWashington, DC 20001') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'D97EF71E-9062-42A9-9510-00048B943421') reg = x['registrant'] self.failUnlessEqual(reg['address'], '8 HERBERT STREET\r\nALEXANDRIA, VA 22305') self.failUnlessEqual(len(filings), 0)
def test_import_foreign_entities_country(self): """Importing foreign entities should fill the 'country' table.""" filings = list(lobbyists.parse_filings(util.testpath('foreign_entities.xml'))) con = sqlite3.connect(':memory:') con = lobbyists.create_db(con) cur = con.cursor() self.failUnless(lobbyists.import_filings(cur, filings)) con.row_factory = sqlite3.Row cur = con.cursor() cur.execute("SELECT * FROM country") rows = [row for row in cur] row = rows.pop() self.failUnlessEqual(row['name'], 'AUSTRALIA') row = rows.pop() self.failUnlessEqual(row['name'], 'UNDETERMINED') row = rows.pop() self.failUnlessEqual(row['name'], 'SEYCHELLES') row = rows.pop() self.failUnlessEqual(row['name'], 'UNITED KINGDOM') row = rows.pop() self.failUnlessEqual(row['name'], '<SELECT ONE>') row = rows.pop() self.failUnlessEqual(row['name'], 'unspecified') row = rows.pop() self.failUnlessEqual(row['name'], 'USA') row = rows.pop() self.failUnlessEqual(row['name'], 'GERMANY') row = rows.pop() self.failUnlessEqual(row['name'], 'JAPAN') self.failUnlessEqual(len(rows), 0)
def test_name(self): """Parse client name""" filings = list( lobbyists.parse_filings(util.testpath('client_name.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '5A4F7B14-D143-4B57-A345-34296865C20D') client = x['client'] self.failUnlessEqual(client['name'], 'Microsoft') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '357D6040-8761-42CB-94C9-0A27C088091E') client = x['client'] self.failUnlessEqual(client['name'], 'COMPUTER & COMMUNICATIONS INDUSTRY ASSN') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '240EDC03-05F3-4F04-9F4B-0018BF4651F1') client = x['client'] self.failUnlessEqual(client['name'], 'AUTOMATIC DATA PROCESSING, INC.') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '583E1BE3-1B7E-4357-A6C8-00125C7DE22D') client = x['client'] self.failUnlessEqual(client['name'], 'EMPRESA BRASILEIRA DE AERONAUTICA SA (EMBRAER)') self.failUnlessEqual(len(filings), 0)
def test_status(self): """Parse lobbyist status""" filings = list(lobbyists.parse_filings(util.testpath('lobbyist_status.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '02DDA99B-725A-4DBA-8397-34892A6918D7') lobbiers = x['lobbyists'] l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['status'], 'terminated') self.failUnlessEqual(len(lobbiers), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'AB94AB3D-F5D6-4EE8-A462-0925A6D9A499') lobbiers = x['lobbyists'] l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['status'], 'active') l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['status'], 'terminated') self.failUnlessEqual(len(lobbiers), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '04926911-8A12-4A0E-9DA4-510869446EAC') lobbiers = x['lobbyists'] l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['status'], 'undetermined') self.failUnlessEqual(len(lobbiers), 0) self.failUnlessEqual(len(filings), 0)
def test_status(self): """Parse foreign entity status""" filings = list(lobbyists.parse_filings(util.testpath('foreign_entity_status.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '07361BD2-5007-42D6-8794-A7597AECC1B9') entities = x['foreign_entities'] e = entities.pop()['foreign_entity'] self.failUnlessEqual(e['status'], 'undetermined') self.failUnlessEqual(len(entities), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '493C9C11-17ED-4875-88D2-FAC96FF06849') entities = x['foreign_entities'] e = entities.pop()['foreign_entity'] self.failUnlessEqual(e['status'], 'terminated') e = entities.pop()['foreign_entity'] self.failUnlessEqual(e['status'], 'terminated') self.failUnlessEqual(len(entities), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'BBF87FC8-73FC-4050-B6F8-850C79EC72E2') entities = x['foreign_entities'] e = entities.pop()['foreign_entity'] self.failUnlessEqual(e['status'], 'active') self.failUnlessEqual(len(entities), 0) self.failUnlessEqual(len(filings), 0)
def test_name(self): """Parse client name""" filings = list(lobbyists.parse_filings(util.testpath('client_name.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '5A4F7B14-D143-4B57-A345-34296865C20D') client = x['client'] self.failUnlessEqual(client['name'], 'Microsoft') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '357D6040-8761-42CB-94C9-0A27C088091E') client = x['client'] self.failUnlessEqual(client['name'], 'COMPUTER & COMMUNICATIONS INDUSTRY ASSN') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '240EDC03-05F3-4F04-9F4B-0018BF4651F1') client = x['client'] self.failUnlessEqual(client['name'], 'AUTOMATIC DATA PROCESSING, INC.') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '583E1BE3-1B7E-4357-A6C8-00125C7DE22D') client = x['client'] self.failUnlessEqual(client['name'], 'EMPRESA BRASILEIRA DE AERONAUTICA SA (EMBRAER)') self.failUnlessEqual(len(filings), 0)
def test_official_position(self): """Parse lobbyist 'official position'""" filings = list(lobbyists.parse_filings(util.testpath('lobbyist_official_position.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'AB94AB3D-F5D6-4EE8-A462-0925A6D9A499') lobbiers = x['lobbyists'] l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'N/A') l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'MGR. AIR TRAFFIC DIV. WEST PAC, REG, FAA') self.failUnlessEqual(len(lobbiers), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'BD894C51-AA23-46AE-9802-006B8C91702B') lobbiers = x['lobbyists'] l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'ExecFlrAsst, H. Maj. Whip; ExecDir, H.DemCauc.') l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'StaffAsst, DemPolicyComm; FlrAsst, MinoritySec') l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'Chief of Staff, President Reagan') l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'AsstEditor/Ed./Res.Dir, Sen.Rep.PolicyComm;') self.failUnlessEqual(len(lobbiers), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'DE038A45-9F6B-4764-B678-8004E7903BC4') lobbiers = x['lobbyists'] l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'unspecified') self.failUnlessEqual(len(lobbiers), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '2164D6BB-EBBA-40D2-9C18-16A2D670030A') lobbiers = x['lobbyists'] l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'N/A') l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'N/A') l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'N/A') l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'N/A') l = lobbiers.pop()['lobbyist'] self.failUnlessEqual(l['official_position'], 'N/A') self.failUnlessEqual(len(lobbiers), 0) self.failUnlessEqual(len(filings), 0)
def test_import_similar_registrants(self): """Slightly different registrants are inserted into different rows.""" filings = list(lobbyists.parse_filings(util.testpath('registrants_slightly_different.xml'))) con = sqlite3.connect(':memory:') con = lobbyists.create_db(con) cur = con.cursor() self.failUnless(lobbyists.import_filings(cur, filings)) cur = con.cursor() cur.execute('SELECT * FROM registrant') self.failUnlessEqual(len(cur.fetchall()), len(filings))
def test_import_similar_lobbyists(self): """Slightly different lobbyists are inserted into different rows.""" filings = list(lobbyists.parse_filings(util.testpath('lobbyists_slightly_different.xml'))) con = sqlite3.connect(':memory:') con = lobbyists.create_db(con) cur = con.cursor() self.failUnless(lobbyists.import_filings(cur, filings)) cur = con.cursor() cur.execute('SELECT id FROM lobbyist') lobbyers = util.flatten([x['lobbyists'] for x in filings if 'lobbyists' in x]) self.failUnlessEqual(len(cur.fetchall()), len(lobbyers))
def test_id(self): """Parse client ID""" filings = list(lobbyists.parse_filings(util.testpath('client_id.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'C3226D4B-2F22-4516-BDF2-9E1F918D140E') client = x['client'] self.failUnlessEqual(client['senate_id'], 48) self.failUnlessEqual(len(filings), 0)
def test_import_identical_clients(self): """Identical clients shouldn't be duplicated in the database.""" filings = list(lobbyists.parse_filings(util.testpath('clients_dup.xml'))) con = sqlite3.connect(':memory:') con = lobbyists.create_db(con) cur = con.cursor() self.failUnless(lobbyists.import_filings(cur, filings)) cur = con.cursor() cur.execute('SELECT client FROM filing_client') row1, row2 = cur.fetchall() self.failUnlessEqual(row1[0], row2[0])
def test_senate_id(self): """Parse registrant Senate ID""" filings = list(lobbyists.parse_filings(util.testpath('registrant_senate_id.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '9CF0D039-7655-4C7E-99E9-00166359FD5B') reg = x['registrant'] self.failUnlessEqual(reg['senate_id'], 287656) self.failUnlessEqual(len(filings), 0)
def test_id(self): """Parse filing id""" filings = list(lobbyists.parse_filings(util.testpath('ids.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '5F787E27-BBF1-45A5-8392-FFF93CCA2746') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'D48A20C9-211C-43B1-BBD1-001B075854BA') self.failUnlessEqual(len(filings), 0)
def test_import_foreign_entity_different_ownership(self): """Foreign entities with different percentage ownership but otherwise identical should occupy same row""" filings = list(lobbyists.parse_filings(util.testpath('foreign_entities_different_ownership.xml'))) con = sqlite3.connect(':memory:') con = lobbyists.create_db(con) cur = con.cursor() self.failUnless(lobbyists.import_filings(cur, filings)) con.row_factory = sqlite3.Row cur = con.cursor() cur.execute("SELECT * FROM foreign_entity") rows = [row for row in cur] self.failUnlessEqual(len(rows), 1)
def test_senate_id(self): """Parse registrant Senate ID""" filings = list( lobbyists.parse_filings(util.testpath('registrant_senate_id.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '9CF0D039-7655-4C7E-99E9-00166359FD5B') reg = x['registrant'] self.failUnlessEqual(reg['senate_id'], 287656) self.failUnlessEqual(len(filings), 0)
def test_import_client_different_description(self): """Clients with different description but otherwise identical should occupy same row.""" filings = list(lobbyists.parse_filings(util.testpath('clients_different_description.xml'))) con = sqlite3.connect(':memory:') con = lobbyists.create_db(con) cur = con.cursor() self.failUnless(lobbyists.import_filings(cur, filings)) con.row_factory = sqlite3.Row cur = con.cursor() cur.execute("SELECT * FROM client") rows = [row for row in cur] self.failUnlessEqual(len(rows), 1)
def test_import_foreign_entity_different_ownership(self): """Foreign entities with different percentage ownership but otherwise identical should occupy same row""" filings = list( lobbyists.parse_filings( util.testpath('foreign_entities_different_ownership.xml'))) con = sqlite3.connect(':memory:') con = lobbyists.create_db(con) cur = con.cursor() self.failUnless(lobbyists.import_filings(cur, filings)) con.row_factory = sqlite3.Row cur = con.cursor() cur.execute("SELECT * FROM foreign_entity") rows = [row for row in cur] self.failUnlessEqual(len(rows), 1)
def test_import_similar_affiliated_orgs(self): """Slightly different affiliated orgs are inserted into different rows.""" filings = list( lobbyists.parse_filings( util.testpath('affiliated_orgs_slightly_different.xml'))) con = sqlite3.connect(':memory:') con = lobbyists.create_db(con) cur = con.cursor() self.failUnless(lobbyists.import_filings(cur, filings)) cur = con.cursor() cur.execute('SELECT id FROM affiliated_org') orgs = util.flatten( [x['affiliated_orgs'] for x in filings if 'affiliated_orgs' in x]) self.failUnlessEqual(len(cur.fetchall()), len(orgs))
def test_country(self): """Parse foreign entity country""" filings = list(lobbyists.parse_filings(util.testpath('foreign_entity_country.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '63AC4585-74BC-478D-A356-FCCFD98FDE64') entities = x['foreign_entities'] e = entities.pop()['foreign_entity'] self.failUnlessEqual(e['country'], '<SELECT ONE>') self.failUnlessEqual(len(entities), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '498EAE8A-DE7A-4FFF-A813-062D92FDA271') entities = x['foreign_entities'] e = entities.pop()['foreign_entity'] self.failUnlessEqual(e['country'], 'unspecified') self.failUnlessEqual(len(entities), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'ACD87507-EB78-4607-95E8-43871D9D1EF2') entities = x['foreign_entities'] e = entities.pop()['foreign_entity'] self.failUnlessEqual(e['country'], 'GERMANY') self.failUnlessEqual(len(entities), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'BECA0C94-4308-47E5-BF23-887D3954E254') entities = x['foreign_entities'] e = entities.pop()['foreign_entity'] self.failUnlessEqual(e['country'], 'USA') e = entities.pop()['foreign_entity'] self.failUnlessEqual(e['country'], 'USA') e = entities.pop()['foreign_entity'] self.failUnlessEqual(e['country'], 'UNDETERMINED') self.failUnlessEqual(len(entities), 0) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '4CAC3894-FA4C-4CEC-99C7-1141544CA49B') entities = x['foreign_entities'] e = entities.pop()['foreign_entity'] self.failUnlessEqual(e['country'], 'SEYCHELLES') e = entities.pop()['foreign_entity'] self.failUnlessEqual(e['country'], 'UNITED KINGDOM') self.failUnlessEqual(len(entities), 0) self.failUnlessEqual(len(filings), 0)
def test_import_client_state_or_local_gov(self): """After importing clients, state_or_local_gov table should be unchanged (it's pre-loaded).""" filings = list(lobbyists.parse_filings(util.testpath('clients.xml'))) con = sqlite3.connect(':memory:') con = lobbyists.create_db(con) cur = con.cursor() self.failUnless(lobbyists.import_filings(cur, filings)) con.row_factory = sqlite3.Row cur = con.cursor() cur.execute("SELECT val FROM state_or_local_gov") rows = set([row[0] for row in cur]) self.failUnlessEqual(len(rows), 3) self.failUnless('unspecified' in rows) self.failUnless('y' in rows) self.failUnless('n' in rows)
def test_import_identical_lobbyists2(self): """Identical lobbyists shouldn't be duplicated in the database (case 2).""" # This test file contains a single filing with two # lobbyists. The two lobbyists are exactly the same, except # with different statuses. This should result in only a single # entry in the filing_lobbyists table. filings = list(lobbyists.parse_filings(util.testpath('lobbyists_dup2.xml'))) con = sqlite3.connect(':memory:') con = lobbyists.create_db(con) cur = con.cursor() self.failUnless(lobbyists.import_filings(cur, filings)) cur = con.cursor() cur.execute('SELECT * FROM lobbyist') rows = cur.fetchall() self.failUnlessEqual(len(rows), 1)
def test_import_client_client_status(self): """After importing clients, client_status table should be unchanged (it's pre-loaded).""" filings = list(lobbyists.parse_filings(util.testpath('clients.xml'))) con = sqlite3.connect(':memory:') con = lobbyists.create_db(con) cur = con.cursor() self.failUnless(lobbyists.import_filings(cur, filings)) con.row_factory = sqlite3.Row cur = con.cursor() cur.execute("SELECT status FROM client_status") rows = set([row[0] for row in cur]) self.failUnlessEqual(len(rows), 3) self.failUnless('active' in rows) self.failUnless('terminated' in rows) self.failUnless('administratively terminated' in rows)
def test_year(self): """Parse filing year""" filings = list(lobbyists.parse_filings(util.testpath('years.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'BEE00319-9EC2-4ECF-89F7-75A6436433F1') self.failUnlessEqual(f['year'], 2008) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '12B45653-7326-4803-8B86-7538D7CA65AA') self.failUnlessEqual(f['year'], 1999) self.failUnlessEqual(len(filings), 0)
def test_filing_date(self): """Parse filing filing date""" filings = list(lobbyists.parse_filings(util.testpath('filing_dates.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '355A164F-AC36-47EC-AD84-0D4DC2CD579E') self.failUnlessEqual(f['filing_date'], '2007-02-13T16:07:28') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '028C0F4E-0B08-465F-BD86-0197F149A77E') self.failUnlessEqual(f['filing_date'], '1999-02-08T00:00:00') self.failUnlessEqual(len(filings), 0)
def test_import_identical_lobbyists2(self): """Identical lobbyists shouldn't be duplicated in the database (case 2).""" # This test file contains a single filing with two # lobbyists. The two lobbyists are exactly the same, except # with different statuses. This should result in only a single # entry in the filing_lobbyists table. filings = list( lobbyists.parse_filings(util.testpath('lobbyists_dup2.xml'))) con = sqlite3.connect(':memory:') con = lobbyists.create_db(con) cur = con.cursor() self.failUnless(lobbyists.import_filings(cur, filings)) cur = con.cursor() cur.execute('SELECT * FROM lobbyist') rows = cur.fetchall() self.failUnlessEqual(len(rows), 1)
def test_filing_date(self): """Parse filing filing date""" filings = list( lobbyists.parse_filings(util.testpath('filing_dates.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '355A164F-AC36-47EC-AD84-0D4DC2CD579E') self.failUnlessEqual(f['filing_date'], '2007-02-13T16:07:28') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '028C0F4E-0B08-465F-BD86-0197F149A77E') self.failUnlessEqual(f['filing_date'], '1999-02-08T00:00:00') self.failUnlessEqual(len(filings), 0)
def test_import_client_orgs(self): """Importing clients should fill the 'org' table.""" filings = list(lobbyists.parse_filings(util.testpath('clients.xml'))) con = sqlite3.connect(':memory:') con = lobbyists.create_db(con) cur = con.cursor() self.failUnless(lobbyists.import_filings(cur, filings)) con.row_factory = sqlite3.Row cur = con.cursor() cur.execute("SELECT * FROM org") rows = [row['name'] for row in cur] clients = [x for x in filings if 'client' in x] orgs = set([x['client']['name'] for x in clients]) self.failUnlessEqual(len(rows), len(orgs)) for org in orgs: self.failUnless(org in rows)
def test_import_govt_entities(self): """Government entity importing.""" filings = list( lobbyists.parse_filings(util.testpath('govt_entities.xml'))) con = sqlite3.connect(':memory:') con = lobbyists.create_db(con) cur = con.cursor() self.failUnless(lobbyists.import_filings(cur, filings)) con.row_factory = sqlite3.Row cur = con.cursor() cur.execute("SELECT * FROM govt_entity") rows = [row for row in cur] row = rows.pop() self.failUnlessEqual(row['name'], 'UNDETERMINED') row = rows.pop() self.failUnlessEqual(row['name'], 'Federal Communications Commission (FCC)') row = rows.pop() self.failUnlessEqual(row['name'], 'Environmental Protection Agency (EPA)') row = rows.pop() self.failUnlessEqual(row['name'], 'Energy, Dept of') row = rows.pop() self.failUnlessEqual(row['name'], 'Federal Energy Regulatory Commission (FERC)') row = rows.pop() self.failUnlessEqual(row['name'], 'Health & Human Services, Dept of (HHS)') row = rows.pop() self.failUnlessEqual(row['name'], 'SENATE') row = rows.pop() self.failUnlessEqual(row['name'], 'HOUSE OF REPRESENTATIVES') row = rows.pop() self.failUnlessEqual(row['name'], 'NONE') self.failUnlessEqual(len(rows), 0)
def test_name(self): """Parse registrant name""" filings = list(lobbyists.parse_filings(util.testpath('registrant_name.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'FB4267F6-F8CF-43EB-BF47-01EAACD4FAC0') reg = x['registrant'] self.failUnlessEqual(reg['name'], 'CHAFE, BONNIE L.') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '03F5D9EE-5EDD-4ACA-8766-0503753C3C1D') reg = x['registrant'] self.failUnlessEqual(reg['name'], u'Crowell & Moring LLP') self.failUnlessEqual(len(filings), 0)
def test_name(self): """Parse registrant name""" filings = list( lobbyists.parse_filings(util.testpath('registrant_name.xml'))) x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], 'FB4267F6-F8CF-43EB-BF47-01EAACD4FAC0') reg = x['registrant'] self.failUnlessEqual(reg['name'], 'CHAFE, BONNIE L.') x = filings.pop() f = x['filing'] self.failUnlessEqual(f['id'], '03F5D9EE-5EDD-4ACA-8766-0503753C3C1D') reg = x['registrant'] self.failUnlessEqual(reg['name'], u'Crowell & Moring LLP') self.failUnlessEqual(len(filings), 0)