def load(): outdb = {} done = set() with db.transaction(): db.delete('earmark_sponsor', '1=1') db.delete('earmark', '1=1') for e in earmarks.parse_file(earmarks.EARMARK_FILE): de = dict(e) de['id'] = web.intget(de['id']) if not de['id'] or de['id'] in done: continue # missing the ID? come on! if isinstance(de['house_request'], basestring): continue # CLASSIFIED for k in de: de[k] = cleanrow(de[k]) for x in ['house_member', 'house_state', 'house_party', 'senate_member', 'senate_state', 'senate_party', 'district']: de.pop(x) de['recipient_stem'] = tools.stemcorpname(de['intended_recipient']) try: db.insert('earmark', seqname=False, **de) except: pprint(de) raise done.add(de['id']) reps_not_found = set() for e in earmarks.parse_file(earmarks.EARMARK_FILE): for rawRequest, chamber in zip([e.house_request, e.senate_request],[e.house_member, e.senate_member]): for rep in chamber: if rep.lower() not in lastname2rep: #@@ should work on improving quality reps_not_found.add(rep) else: rep = lastname2rep[rep.lower()] if e.id in done: try: db.insert('earmark_sponsor', seqname=False, earmark_id=e.id, politician_id=rep) except: print "Couldn't add %s as sponsor to earmark %d" %(rep, e.id) outdb.setdefault(rep, { 'amt_earmark_requested': 0, 'n_earmark_requested': 0, 'n_earmark_received': 0, 'amt_earmark_received': 0 }) outdb[rep]['n_earmark_requested'] += 1 requested = rawRequest or e.final_amt if not isinstance(requested, float): requested = e.final_amt if requested: outdb[rep]['amt_earmark_requested'] += requested if isinstance(e.final_amt, float) and e.final_amt: outdb[rep]['n_earmark_received'] += 1 outdb[rep]['amt_earmark_received'] += e.final_amt print "Did not find",len(reps_not_found),"reps:", pformat(reps_not_found) for rep, d in outdb.iteritems(): db.update('politician', where='id=$rep', vars=locals(), **d)
def load_fec_efilings(filepattern=fec_crude_csv.DEFAULT_EFILINGS_FILEPATTERN): for f, schedules in fec_crude_csv.parse_efilings(glob.glob(filepattern)): for s in schedules: if s.get('type') == 'contribution': # XXX all this code for politician_id is currently # dead, does nothing useful politician_id = None if f.get('candidate_fec_id'): fec_id = f['candidate_fec_id'] pol_fec_id = list( db.select('politician_fec_ids', where='fec_id=$fec_id', vars=locals())) if pol_fec_id and len(pol_fec_id) == 1: politician_id = pol_fec_id[0].politician_id elif not politician_id and f.get('candidate'): names = f['candidate'].split(' ') fn, ln = names[0], names[-1] pol = list( db.select('politician', where='lastname=$ln and firstname=$fn', vars=locals())) if pol and len(pol) == 1: politician_id = pol[0].id db.insert('contribution', committee=f['committee'], contrib_date=s['date'], contributor_org=s.get('contributor_org'), contributor=s['contributor'], occupation=s['occupation'], employer=s['employer'], employer_stem=tools.stemcorpname(s['employer']), candidate_name=f.get('candidate'), filer_id=f['filer_id'], report_id=f['report_id'], amount=s['amount']) elif s.get('type') == 'expenditure': db.insert('expenditure', candidate_name=f.get('candidate'), committee=f['committee'], expenditure_date=s['date'], recipient=s['recipient'], filer_id=f['filer_id'], report_id=f['report_id'], amount=s['amount']) else: print "ignoring record of type %s" % \ s['original_data'].get('form_type')
def load_fec_efilings(filepattern=fec_crude_csv.DEFAULT_EFILINGS_FILEPATTERN): for f, schedules in fec_crude_csv.parse_efilings(glob.glob(filepattern)): for s in schedules: if s.get('type') == 'contribution': # XXX all this code for politician_id is currently # dead, does nothing useful politician_id = None if f.get('candidate_fec_id'): fec_id = f['candidate_fec_id'] pol_fec_id = list(db.select('politician_fec_ids', where='fec_id=$fec_id', vars=locals())) if pol_fec_id and len(pol_fec_id) == 1: politician_id = pol_fec_id[0].politician_id elif not politician_id and f.get('candidate'): names = f['candidate'].split(' ') fn, ln = names[0], names[-1] pol = list(db.select('politician', where='lastname=$ln and firstname=$fn', vars=locals())) if pol and len(pol) == 1: politician_id = pol[0].id db.insert('contribution', committee=f['committee'], contrib_date=s['date'], contributor_org=s.get('contributor_org'), contributor=s['contributor'], occupation=s['occupation'], employer=s['employer'], employer_stem=tools.stemcorpname(s['employer']), candidate_name=f.get('candidate'), filer_id=f['filer_id'], report_id=f['report_id'], amount=s['amount']) elif s.get('type') == 'expenditure': db.insert('expenditure', candidate_name=f.get('candidate'), committee=f['committee'], expenditure_date=s['date'], recipient=s['recipient'], filer_id=f['filer_id'], report_id=f['report_id'], amount=s['amount']) else: print "ignoring record of type %s" % \ s['original_data'].get('form_type')
def load_fec_contributions(): t = db.transaction(); n = 0 db.delete('contribution', '1=1') for f in fec_cobol.parse_contributions(): f = web.storage(f) f.occupation = f.occupation.replace('N/A', '') if '/' in f.occupation: employer, occupation = f.occupation.split('/', 1) else: employer = '' occupation = f.occupation try: datetime.date(*[int(x) for x in f.date.split('-')]) except ValueError: f.date = None db.insert('contribution', fec_record_id = f.get('fec_record_id'), microfilm_loc = f.microfilm_loc, recipient_id = f.filer_id, name = f.name, street = f.get('street'), city = f.city, state = f.state, zip = f.zip, occupation = occupation, employer = employer, employer_stem = tools.stemcorpname(employer), committee = f.from_id or None, sent = f.date, amount = f.amount ) n += 1 if n % 10000 == 0: t.commit(); t = db.transaction(); print n t.commit() print "Creating indexes on table `contribution`..." schema.Contribution.create_indexes() print "done."
def load_fec_contributions(): t = db.transaction() n = 0 db.delete('contribution', '1=1') for f in fec_cobol.parse_contributions(): f = web.storage(f) f.occupation = f.occupation.replace('N/A', '') if '/' in f.occupation: employer, occupation = f.occupation.split('/', 1) else: employer = '' occupation = f.occupation try: datetime.date(*[int(x) for x in f.date.split('-')]) except ValueError: f.date = None db.insert('contribution', fec_record_id=f.get('fec_record_id'), microfilm_loc=f.microfilm_loc, recipient_id=f.filer_id, name=f.name, street=f.get('street'), city=f.city, state=f.state, zip=f.zip, occupation=occupation, employer=employer, employer_stem=tools.stemcorpname(employer), committee=f.from_id or None, sent=f.date, amount=f.amount) n += 1 if n % 10000 == 0: t.commit() t = db.transaction() print n t.commit()
def load_fec_efilings(): for f in fec_csv.parse_efilings(): for s in f['schedules']: if s['type'] == 'contribution': politician_id = None if f['candidate_fec_id']: fec_id = f['candidate_fec_id'] pol_fec_id = list(db.select('politician_fec_ids', where='fec_id=$fec_id', vars=locals())) if pol_fec_id and len(pol_fec_id) == 1: politician_id = pol_fec_id[0].politician_id elif not politician_id and f['candidate']: names = f['candidate'].split(' ') fn, ln = names[0], names[-1] pol = list(db.select('politician', where='lastname=$ln and firstname=$fn', vars=locals())) if pol and len(pol) == 1: politician_id = pol[0].id db.insert('contribution', committee=f['committee'], contrib_date=s['date'], contributor_org=s['contributor_org'], contributor=s['contributor'], occupation=s['occupation'], employer=s['employer'], employer_stem=tools.stemcorpname(s['employer']), candidate_name=f['candidate'], filer_id=f['filer_id'], report_id=f['report_id'], amount=s['amount']) else: db.insert('expenditure', candidate_name=f['candidate'], committee=f['committee'], expenditure_date=s['date'], recipient=s['recipient'], filer_id=f['filer_id'], report_id=f['report_id'], amount=s['amount'])
def load(): outdb = {} done = set() with db.transaction(): db.delete('earmark_sponsor', '1=1') db.delete('earmark', '1=1') for e in earmarks.parse_file(earmarks.EARMARK_FILE): de = dict(e) de['id'] = web.intget(de['id']) if not de['id'] or de['id'] in done: continue # missing the ID? come on! if isinstance(de['house_request'], basestring): continue # CLASSIFIED for k in de: de[k] = cleanrow(de[k]) for x in [ 'house_member', 'house_state', 'house_party', 'senate_member', 'senate_state', 'senate_party', 'district' ]: de.pop(x) de['recipient_stem'] = tools.stemcorpname(de['intended_recipient']) try: db.insert('earmark', seqname=False, **de) except: pprint(de) raise done.add(de['id']) reps_not_found = set() for e in earmarks.parse_file(earmarks.EARMARK_FILE): for rawRequest, chamber in zip([e.house_request, e.senate_request], [e.house_member, e.senate_member]): for rep in chamber: if rep.lower() not in lastname2rep: #@@ should work on improving quality reps_not_found.add(rep) else: rep = lastname2rep[rep.lower()] if e.id in done: try: db.insert('earmark_sponsor', seqname=False, earmark_id=e.id, politician_id=rep) except: print "Couldn't add %s as sponsor to earmark %d" % ( rep, e.id) outdb.setdefault( rep, { 'amt_earmark_requested': 0, 'n_earmark_requested': 0, 'n_earmark_received': 0, 'amt_earmark_received': 0 }) outdb[rep]['n_earmark_requested'] += 1 requested = rawRequest or e.final_amt if not isinstance(requested, float): requested = e.final_amt if requested: outdb[rep]['amt_earmark_requested'] += requested if isinstance(e.final_amt, float) and e.final_amt: outdb[rep]['n_earmark_received'] += 1 outdb[rep]['amt_earmark_received'] += e.final_amt print "Did not find", len(reps_not_found), "reps:", pformat(reps_not_found) for rep, d in outdb.iteritems(): db.update('politician', where='id=$rep', vars=locals(), **d)