def load_data(): c = csv.reader(file('../data/crawl/maplight/uniq_map_export_bill_research.csv')) supportdict = {'0': -1, '1': 1, '2': 0 } #0: oppose ; 1: support; 2: not known (from README) with db.transaction(): db.delete('interest_group_bill_support', '1=1') for line in c: if not line[0].startswith('#'): category_id, longname, maplightid, session, measure, support = line support = supportdict[support] if support == 0: continue typenumber = measure.lower().replace(' ', '') r = db.select('interest_group', what="id", where="longname=$longname", vars=locals()) if r: groupid = r[0].id else: groupid = db.insert('interest_group', longname=longname, category_id=category_id) bill_id = 'us/%s/%s' % (session, typenumber) r = db.select('bill', where="id=$bill_id", vars=locals()) if not r: filename = "../data/crawl/govtrack/us/%s/bills/%s.xml" % (session, typenumber) bills.loadbill(filename, maplightid=maplightid) else: db.update('bill', maplightid=maplightid, where="id=$bill_id", vars=locals()) try: #print '\r', bill_id, db.insert('interest_group_bill_support', seqname=False, bill_id=bill_id, group_id=groupid, support=support) except: print '\n Duplicate row with billid %s groupid %s support %s longname %s' % (bill_id, groupid, support, longname) raise
def load_data(): c = csv.reader( file('../data/crawl/maplight/uniq_map_export_bill_research.csv')) supportdict = { '0': -1, '1': 1, '2': 0 } #0: oppose ; 1: support; 2: not known (from README) with db.transaction(): db.delete('interest_group_bill_support', '1=1') for line in c: if not line[0].startswith('#'): category_id, longname, maplightid, session, measure, support = line support = supportdict[support] if support == 0: continue typenumber = measure.lower().replace(' ', '') r = db.select('interest_group', what="id", where="longname=$longname", vars=locals()) if r: groupid = r[0].id else: groupid = db.insert('interest_group', longname=longname, category_id=category_id) bill_id = 'us/%s/%s' % (session, typenumber) r = db.select('bill', where="id=$bill_id", vars=locals()) if not r: filename = "../data/crawl/govtrack/us/%s/bills/%s.xml" % ( session, typenumber) bills.loadbill(filename, maplightid=maplightid) else: db.update('bill', maplightid=maplightid, where="id=$bill_id", vars=locals()) try: #print '\r', bill_id, db.insert('interest_group_bill_support', seqname=False, bill_id=bill_id, group_id=groupid, support=support) except: print '\n Duplicate row with billid %s groupid %s support %s longname %s' % ( bill_id, groupid, support, longname) raise
def get_dist(zip5): # TODO: if zip5 is state (select code from state where code=$zip5) then return {zip5: 1.0} if db.select('state', where='code=$zip5',vars=locals()): return {zip5:1.0} dists = db.select('zip4', what='COUNT(plus4), district_id', where='zip=$zip5', group='district_id', vars=locals()).list() all_zip4 = sum(map(lambda d: d.count, dists)) ret = {} for d in dists: ret[d.district_id] = float(d.count) / float(all_zip4) return ret
def load_fec_efilings(filepattern=fec_crude_csv.DEFAULT_EFILINGS_FILEPATTERN): for f, schedules in fec_crude_csv.parse_efilings(glob.glob(filepattern)): for s in schedules: if s.get('type') == 'contribution': # XXX all this code for politician_id is currently # dead, does nothing useful politician_id = None if f.get('candidate_fec_id'): fec_id = f['candidate_fec_id'] pol_fec_id = list( db.select('politician_fec_ids', where='fec_id=$fec_id', vars=locals())) if pol_fec_id and len(pol_fec_id) == 1: politician_id = pol_fec_id[0].politician_id elif not politician_id and f.get('candidate'): names = f['candidate'].split(' ') fn, ln = names[0], names[-1] pol = list( db.select('politician', where='lastname=$ln and firstname=$fn', vars=locals())) if pol and len(pol) == 1: politician_id = pol[0].id db.insert('contribution', committee=f['committee'], contrib_date=s['date'], contributor_org=s.get('contributor_org'), contributor=s['contributor'], occupation=s['occupation'], employer=s['employer'], employer_stem=tools.stemcorpname(s['employer']), candidate_name=f.get('candidate'), filer_id=f['filer_id'], report_id=f['report_id'], amount=s['amount']) elif s.get('type') == 'expenditure': db.insert('expenditure', candidate_name=f.get('candidate'), committee=f['committee'], expenditure_date=s['date'], recipient=s['recipient'], filer_id=f['filer_id'], report_id=f['report_id'], amount=s['amount']) else: print "ignoring record of type %s" % \ s['original_data'].get('form_type')
def load_fec_efilings(filepattern=fec_crude_csv.DEFAULT_EFILINGS_FILEPATTERN): for f, schedules in fec_crude_csv.parse_efilings(glob.glob(filepattern)): for s in schedules: if s.get('type') == 'contribution': # XXX all this code for politician_id is currently # dead, does nothing useful politician_id = None if f.get('candidate_fec_id'): fec_id = f['candidate_fec_id'] pol_fec_id = list(db.select('politician_fec_ids', where='fec_id=$fec_id', vars=locals())) if pol_fec_id and len(pol_fec_id) == 1: politician_id = pol_fec_id[0].politician_id elif not politician_id and f.get('candidate'): names = f['candidate'].split(' ') fn, ln = names[0], names[-1] pol = list(db.select('politician', where='lastname=$ln and firstname=$fn', vars=locals())) if pol and len(pol) == 1: politician_id = pol[0].id db.insert('contribution', committee=f['committee'], contrib_date=s['date'], contributor_org=s.get('contributor_org'), contributor=s['contributor'], occupation=s['occupation'], employer=s['employer'], employer_stem=tools.stemcorpname(s['employer']), candidate_name=f.get('candidate'), filer_id=f['filer_id'], report_id=f['report_id'], amount=s['amount']) elif s.get('type') == 'expenditure': db.insert('expenditure', candidate_name=f.get('candidate'), committee=f['committee'], expenditure_date=s['date'], recipient=s['recipient'], filer_id=f['filer_id'], report_id=f['report_id'], amount=s['amount']) else: print "ignoring record of type %s" % \ s['original_data'].get('form_type')
def main(): with db.transaction(): db.delete("vote", "1=1") bill_ids = ", ".join((str(s.id) for s in db.select("bill", what="id"))) db.delete("interest_group_bill_support", where="bill_id in ($bill_ids)", vars=locals()) db.delete("bill", "1=1") for fn in glob.glob("../data/crawl/govtrack/us/*/bills/*.xml"): loadbill(fn)
def check_user(): users = db.select('user', limitation={'level': 0}) for user in users: print(user) n_level = input('new level: ') n_level = int(n_level) assert n_level >= 0 and n_level <= 100 db.upd_row('user', limitation={'wechat': user['wechat']}, data={'level': n_level})
def load_fec_efilings(): for f in fec_csv.parse_efilings(): for s in f['schedules']: if s['type'] == 'contribution': politician_id = None if f['candidate_fec_id']: fec_id = f['candidate_fec_id'] pol_fec_id = list(db.select('politician_fec_ids', where='fec_id=$fec_id', vars=locals())) if pol_fec_id and len(pol_fec_id) == 1: politician_id = pol_fec_id[0].politician_id elif not politician_id and f['candidate']: names = f['candidate'].split(' ') fn, ln = names[0], names[-1] pol = list(db.select('politician', where='lastname=$ln and firstname=$fn', vars=locals())) if pol and len(pol) == 1: politician_id = pol[0].id db.insert('contribution', committee=f['committee'], contrib_date=s['date'], contributor_org=s['contributor_org'], contributor=s['contributor'], occupation=s['occupation'], employer=s['employer'], employer_stem=tools.stemcorpname(s['employer']), candidate_name=f['candidate'], filer_id=f['filer_id'], report_id=f['report_id'], amount=s['amount']) else: db.insert('expenditure', candidate_name=f['candidate'], committee=f['committee'], expenditure_date=s['date'], recipient=s['recipient'], filer_id=f['filer_id'], report_id=f['report_id'], amount=s['amount'])
def loadbill(fn, maplightid=None): bill = xmltramp.load(fn) d = bill2dict(bill) if maplightid: d["maplightid"] = maplightid db.insert("bill", seqname=False, **d) print "\r %-25s" % d["id"], sys.stdout.flush() done = [] for vote in bill.actions["vote":]: if not vote().get("roll"): continue if vote("where") in done: continue # don't count veto overrides done.append(vote("where")) votedoc = "%s/rolls/%s%s-%s.xml" % (d["session"], vote("where"), vote("datetime")[:4], vote("roll")) vote = xmltramp.load("../data/crawl/govtrack/us/" + votedoc) yeas = 0 neas = 0 for voter in vote["voter":]: if fixvote(voter("vote")) == 1: yeas += 1 elif fixvote(voter("vote")) == -1: neas += 1 rep = govtrackp(voter("id")) if rep: # UGLY HACK: if a politician (bob_menendez for instance) voted # for the same bill in both chambers of congress the insert # fails. if not db.select("vote", where="bill_id=$d['id'] AND politician_id=$rep", vars=locals()): db.insert("vote", seqname=False, politician_id=rep, bill_id=d["id"], vote=fixvote(voter("vote"))) else: print print "Updating:", votedoc, rep, d["id"], fixvote(voter("vote")) db.update( "vote", where="bill_id=$d['id'] AND politician_id=$rep", vote=fixvote(voter("vote")), vars=locals(), ) db.update("bill", where="id = $d['id']", yeas=yeas, neas=neas, vars=locals())