Exemple #1
0
def loadbill(fn, maplightid=None):
    bill = xmltramp.load(fn)
    d = bill2dict(bill)
    d.maplightid = maplightid
    
    try:
        bill_id = d.id
        db.insert('bill', seqname=False, **d)
    except IntegrityError:
        bill_id = d.pop('id')
        db.update('bill', where="id=" + web.sqlquote(bill_id), **d)
    
    positions = {}
    for vote in bill.actions['vote':]:
        if not vote().get('roll'): continue
        
        rolldoc = '/us/%s/rolls/%s%s-%s.xml' % (
          d.session, vote('where'), vote('datetime')[:4], vote('roll'))
        roll = xmltramp.load(GOVTRACK_CRAWL + rolldoc)
        for voter in roll['voter':]:
            positions[govtrackp(voter('id'))] = fixvote(voter('vote'))

    if None in positions: del positions[None]
    with db.transaction():
        db.delete('position', where='bill_id=$bill_id', vars=locals())
        for p, v in positions.iteritems():
            db.insert('position', seqname=False, 
              bill_id=bill_id, politician_id=p, vote=v)
Exemple #2
0
def load_data():
    c = csv.reader(file('../data/crawl/maplight/uniq_map_export_bill_research.csv'))
    supportdict = {'0': -1, '1': 1, '2': 0 } #0: oppose ; 1: support; 2: not known (from README)
    
    with db.transaction():
        db.delete('interest_group_bill_support', '1=1')
        for line in c:
            if not line[0].startswith('#'):
                category_id, longname, maplightid, session, measure, support = line
                support = supportdict[support]
                if support == 0: continue
                typenumber = measure.lower().replace(' ', '')
                    
                r = db.select('interest_group', what="id", where="longname=$longname", vars=locals())
                if r:
                    groupid = r[0].id
                else:
                    groupid = db.insert('interest_group', longname=longname, category_id=category_id)
                    
                bill_id = 'us/%s/%s' % (session, typenumber)
                r = db.select('bill', where="id=$bill_id", vars=locals())
                if not r:
                    filename = "../data/crawl/govtrack/us/%s/bills/%s.xml" % (session, typenumber)
                    bills.loadbill(filename, maplightid=maplightid)
                else:
                    db.update('bill', maplightid=maplightid, where="id=$bill_id", vars=locals())
                    
                try:
                    #print '\r', bill_id,
                    db.insert('interest_group_bill_support', seqname=False, bill_id=bill_id, group_id=groupid, support=support)
                except:
                    print '\n Duplicate row with billid %s groupid %s support %s longname %s' % (bill_id, groupid, support, longname)
                    raise
Exemple #3
0
def generate_similarities():
    """
    Generate similarity information for each (interest group, politician) pair and store in DB
    """
    result = db.query(
        'select igbp.group_id, position.politician_id, igbp.support, position.vote'
        ' from interest_group_bill_support igbp, position'
        ' where igbp.bill_id = position.bill_id')
    sim = {}
    total = {}

    for r in result:
        k = (r.group_id, r.politician_id)
        if r.support == r.vote and r.support != 0:
            sim[k] = sim.get(k, 0) + 1
        total[k] = total.get(k, 0) + 1

    with db.transaction():
        db.delete('group_politician_similarity', '1=1')
        for k, agreed in sim.items():
            group_id, politician_id = k
            db.insert('group_politician_similarity',
                      seqname=False,
                      group_id=group_id,
                      politician_id=politician_id,
                      agreed=agreed,
                      total=total[k])
Exemple #4
0
def load_soi():
    # TODO: not sure how to handle agi and gini values.
    districts = {}
    data = {}
    for z in soi.parse_soi():
        dists_for_data = get_dist(z.loc)
        if dists_for_data:
            data[z.loc] = z
            for d in dists_for_data.keys():   # for each district associated with loc
                if d not in districts:
                    districts[d] = { 'brackets': [{'n_filers':0} for x in range(len(z.brackets))] }
                for new_data,cur_data in zip(z.brackets, districts[d]['brackets']):
                    #print new_data.n_filers, new_data.n_prepared, new_data.agi, new_data.bracket_low
                    n_filers_old = cur_data['n_filers']
                    if not new_data.n_filers: new_data['n_filers'] = 0
                    n_filers_new = n_filers_old + new_data.n_filers * dists_for_data[d]
                    for k in new_data.keys():
                        if k not in cur_data:
                            cur_data[k] = 0
                        if k.startswith('n_') or k.startswith('tot_') or k == 'agi':
                            if new_data[k]:
                                cur_data[k] += new_data[k] * dists_for_data[d]
                        elif k.startswith('pct_') or k.startswith('avg_'):
                            if new_data[k]:
                                cur_data[k] = (cur_data[k] * n_filers_old + dists_for_data[d] * new_data[k] * new_data.n_filers) / n_filers_new
                        else:
                            #if k in cur_data and cur_data[k] != new_data[k]: print k, cur_data[k], new_data[k]
                            cur_data[k] = new_data[k]
    for d in districts.keys():
        for b in districts[d]['brackets']:
            if d == 'DC': d = 'DC-00' # HACK: Data has DC which should be DC-00.
            # We can't use None because bracket_low is part of the primary key.
            if isinstance(b['bracket_low'], NoneType): b['bracket_low'] = -1
            db.insert('soi', seqname=False, district_id=d, **b)
Exemple #5
0
def loadroll(fn):
    roll = web.storage()
    roll.id = fn.split('/')[-1].split('.')[0]
    vote = xmltramp.load(fn)
    if vote['bill':]:
        b = vote.bill
        roll.bill_id = 'us/%s/%s%s' % (b('session'), b('type'), b('number'))
    else:
        roll.bill_id = None
    roll.type = str(vote.type)
    roll.question = str(vote.question)
    roll.required = str(vote.required)
    roll.result = str(vote.result)
    
    try:
        db.insert('roll', seqname=False, **roll)
    except IntegrityError:
        if not db.update('roll', where="id=" + web.sqlquote(roll.id), bill_id=roll.bill_id):
            print "\nMissing bill:", roll.bill_id
            raise NotDone
    
    with db.transaction():
        db.delete('vote', where="roll_id=$roll.id", vars=locals())
        for voter in vote['voter':]:
            rep = govtrackp(voter('id'))
            if rep:
                db.insert('vote', seqname=False, 
                  politician_id=rep, roll_id=roll.id, vote=fixvote(voter('vote')))
            else:
                pass #@@!--check again after load_everyone
Exemple #6
0
def load_categories():
    c = csv.reader(file('../data/crawl/maplight/CRP_Categories.csv'))
    with db.transaction():
        db.delete('category', '1=1')
        for line in c:
            if not line[0].startswith('#'):
                cid, cname, industry, sector, empty = line
                db.insert('category', seqname=False, id=cid, name=cname, industry=industry, sector=sector)
Exemple #7
0
def load_data():
    c = csv.reader(
        file('../data/crawl/maplight/uniq_map_export_bill_research.csv'))
    supportdict = {
        '0': -1,
        '1': 1,
        '2': 0
    }  #0: oppose ; 1: support; 2: not known (from README)

    with db.transaction():
        db.delete('interest_group_bill_support', '1=1')
        for line in c:
            if not line[0].startswith('#'):
                category_id, longname, maplightid, session, measure, support = line
                support = supportdict[support]
                if support == 0: continue
                typenumber = measure.lower().replace(' ', '')

                r = db.select('interest_group',
                              what="id",
                              where="longname=$longname",
                              vars=locals())
                if r:
                    groupid = r[0].id
                else:
                    groupid = db.insert('interest_group',
                                        longname=longname,
                                        category_id=category_id)

                bill_id = 'us/%s/%s' % (session, typenumber)
                r = db.select('bill', where="id=$bill_id", vars=locals())
                if not r:
                    filename = "../data/crawl/govtrack/us/%s/bills/%s.xml" % (
                        session, typenumber)
                    bills.loadbill(filename, maplightid=maplightid)
                else:
                    db.update('bill',
                              maplightid=maplightid,
                              where="id=$bill_id",
                              vars=locals())

                try:
                    #print '\r', bill_id,
                    db.insert('interest_group_bill_support',
                              seqname=False,
                              bill_id=bill_id,
                              group_id=groupid,
                              support=support)
                except:
                    print '\n Duplicate row with billid %s groupid %s support %s longname %s' % (
                        bill_id, groupid, support, longname)
                    raise
Exemple #8
0
def load_categories():
    c = csv.reader(file('../data/crawl/maplight/CRP_Categories.csv'))
    with db.transaction():
        db.delete('category', '1=1')
        for line in c:
            if not line[0].startswith('#'):
                cid, cname, industry, sector, empty = line
                db.insert('category',
                          seqname=False,
                          id=cid,
                          name=cname,
                          industry=industry,
                          sector=sector)
Exemple #9
0
def load_fec_ids():
    with db.transaction():
        db.delete('politician_fec_ids', '1=1')
        fh = iter(file('../data/crawl/opensecrets/FEC_CRP_ID.tsv'))
        header = fh.next()
        for line in fh:
            fec_id, crp_id = line.split()
            if tools.opensecretsp(crp_id):
                fec2pol[fec_id] = tools.opensecretsp(crp_id)
                db.insert('politician_fec_ids',
                  seqname=False,
                  politician_id=tools.opensecretsp(crp_id),
                  fec_id=fec_id)
Exemple #10
0
def load_fec_ids():
    with db.transaction():
        db.delete('politician_fec_ids', '1=1')
        fh = iter(file('../data/crawl/opensecrets/FEC_CRP_ID.tsv'))
        header = fh.next()
        for line in fh:
            fec_id, crp_id = line.split()
            if tools.opensecretsp(crp_id):
                fec2pol[fec_id] = tools.opensecretsp(crp_id)
                db.insert('politician_fec_ids',
                          seqname=False,
                          politician_id=tools.opensecretsp(crp_id),
                          fec_id=fec_id)
Exemple #11
0
def load_fec_efilings(filepattern=fec_crude_csv.DEFAULT_EFILINGS_FILEPATTERN):
    for f, schedules in fec_crude_csv.parse_efilings(glob.glob(filepattern)):
        for s in schedules:
            if s.get('type') == 'contribution':
                # XXX all this code for politician_id is currently
                # dead, does nothing useful
                politician_id = None
                if f.get('candidate_fec_id'):
                    fec_id = f['candidate_fec_id']
                    pol_fec_id = list(
                        db.select('politician_fec_ids',
                                  where='fec_id=$fec_id',
                                  vars=locals()))
                    if pol_fec_id and len(pol_fec_id) == 1:
                        politician_id = pol_fec_id[0].politician_id
                elif not politician_id and f.get('candidate'):
                    names = f['candidate'].split(' ')
                    fn, ln = names[0], names[-1]
                    pol = list(
                        db.select('politician',
                                  where='lastname=$ln and firstname=$fn',
                                  vars=locals()))
                    if pol and len(pol) == 1:
                        politician_id = pol[0].id
                db.insert('contribution',
                          committee=f['committee'],
                          contrib_date=s['date'],
                          contributor_org=s.get('contributor_org'),
                          contributor=s['contributor'],
                          occupation=s['occupation'],
                          employer=s['employer'],
                          employer_stem=tools.stemcorpname(s['employer']),
                          candidate_name=f.get('candidate'),
                          filer_id=f['filer_id'],
                          report_id=f['report_id'],
                          amount=s['amount'])
            elif s.get('type') == 'expenditure':
                db.insert('expenditure',
                          candidate_name=f.get('candidate'),
                          committee=f['committee'],
                          expenditure_date=s['date'],
                          recipient=s['recipient'],
                          filer_id=f['filer_id'],
                          report_id=f['report_id'],
                          amount=s['amount'])
            else:
                print "ignoring record of type %s" % \
                      s['original_data'].get('form_type')
Exemple #12
0
def load_fec_efilings(filepattern=fec_crude_csv.DEFAULT_EFILINGS_FILEPATTERN):
    for f, schedules in fec_crude_csv.parse_efilings(glob.glob(filepattern)):
        for s in schedules:
            if s.get('type') == 'contribution':
                # XXX all this code for politician_id is currently
                # dead, does nothing useful
                politician_id = None
                if f.get('candidate_fec_id'):
                    fec_id = f['candidate_fec_id']
                    pol_fec_id = list(db.select('politician_fec_ids',
                                                where='fec_id=$fec_id',
                                                vars=locals()))
                    if pol_fec_id and len(pol_fec_id) == 1:
                        politician_id = pol_fec_id[0].politician_id
                elif not politician_id and f.get('candidate'):
                    names = f['candidate'].split(' ')
                    fn, ln = names[0], names[-1]
                    pol = list(db.select('politician',
                                        where='lastname=$ln and firstname=$fn',
                                        vars=locals()))
                    if pol and len(pol) == 1:
                        politician_id = pol[0].id
                db.insert('contribution',
                          committee=f['committee'],
                          contrib_date=s['date'],
                          contributor_org=s.get('contributor_org'),
                          contributor=s['contributor'],
                          occupation=s['occupation'],
                          employer=s['employer'],
                          employer_stem=tools.stemcorpname(s['employer']),
                          candidate_name=f.get('candidate'),
                          filer_id=f['filer_id'],
                          report_id=f['report_id'],
                          amount=s['amount'])
            elif s.get('type') == 'expenditure':
                db.insert('expenditure',
                          candidate_name=f.get('candidate'),
                          committee=f['committee'],
                          expenditure_date=s['date'],
                          recipient=s['recipient'],
                          filer_id=f['filer_id'],
                          report_id=f['report_id'],
                          amount=s['amount'])
            else:
                print "ignoring record of type %s" % \
                      s['original_data'].get('form_type')
Exemple #13
0
def loadbill(fn, maplightid=None):
    bill = xmltramp.load(fn)
    d = bill2dict(bill)
    if maplightid:
        d["maplightid"] = maplightid
    db.insert("bill", seqname=False, **d)
    print "\r  %-25s" % d["id"],
    sys.stdout.flush()

    done = []
    for vote in bill.actions["vote":]:
        if not vote().get("roll"):
            continue
        if vote("where") in done:
            continue  # don't count veto overrides
        done.append(vote("where"))

        votedoc = "%s/rolls/%s%s-%s.xml" % (d["session"], vote("where"), vote("datetime")[:4], vote("roll"))
        vote = xmltramp.load("../data/crawl/govtrack/us/" + votedoc)
        yeas = 0
        neas = 0
        for voter in vote["voter":]:
            if fixvote(voter("vote")) == 1:
                yeas += 1
            elif fixvote(voter("vote")) == -1:
                neas += 1
            rep = govtrackp(voter("id"))
            if rep:
                # UGLY HACK: if a politician (bob_menendez for instance) voted
                # for the same bill in both chambers of congress the insert
                # fails.
                if not db.select("vote", where="bill_id=$d['id'] AND politician_id=$rep", vars=locals()):
                    db.insert("vote", seqname=False, politician_id=rep, bill_id=d["id"], vote=fixvote(voter("vote")))
                else:
                    print
                    print "Updating:", votedoc, rep, d["id"], fixvote(voter("vote"))
                    db.update(
                        "vote",
                        where="bill_id=$d['id'] AND politician_id=$rep",
                        vote=fixvote(voter("vote")),
                        vars=locals(),
                    )
        db.update("bill", where="id = $d['id']", yeas=yeas, neas=neas, vars=locals())
Exemple #14
0
def load_fec_committees():
    db.delete('contribution', '1=1')
    db.delete('committee', '1=1')
    for f in fec_cobol.parse_committees(reverse=True):
        f = web.storage(f)
        try:
            db.insert('committee', seqname=False,
              id = f.committee_id,
              name = f.committee_name,
              treasurer = f.treasurer_name,
              street1 = f.street_one,
              street2 = f.street_two,
              city = f.city,
              state = f.state,
              zip = f.zip,
              connected_org_name = f.connected_org_name,
              candidate_id = f.candidate_id,
              type = f.committee_type
            )
        except psycopg2.IntegrityError:
            pass # already imported
Exemple #15
0
def load_fec_committees():
    db.delete('contribution', '1=1')
    db.delete('committee', '1=1')
    for f in fec_cobol.parse_committees(reverse=True):
        f = web.storage(f)
        try:
            db.insert('committee',
                      seqname=False,
                      id=f.committee_id,
                      name=f.committee_name,
                      treasurer=f.treasurer_name,
                      street1=f.street_one,
                      street2=f.street_two,
                      city=f.city,
                      state=f.state,
                      zip=f.zip,
                      connected_org_name=f.connected_org_name,
                      candidate_id=f.candidate_id,
                      type=f.committee_type)
        except psycopg2.IntegrityError:
            pass  # already imported
Exemple #16
0
def load_fec_contributions():
    t = db.transaction()
    n = 0
    db.delete('contribution', '1=1')
    for f in fec_cobol.parse_contributions():
        f = web.storage(f)
        f.occupation = f.occupation.replace('N/A', '')
        if '/' in f.occupation:
            employer, occupation = f.occupation.split('/', 1)
        else:
            employer = ''
            occupation = f.occupation

        try:
            datetime.date(*[int(x) for x in f.date.split('-')])
        except ValueError:
            f.date = None

        db.insert('contribution',
                  fec_record_id=f.get('fec_record_id'),
                  microfilm_loc=f.microfilm_loc,
                  recipient_id=f.filer_id,
                  name=f.name,
                  street=f.get('street'),
                  city=f.city,
                  state=f.state,
                  zip=f.zip,
                  occupation=occupation,
                  employer=employer,
                  employer_stem=tools.stemcorpname(employer),
                  committee=f.from_id or None,
                  sent=f.date,
                  amount=f.amount)
        n += 1
        if n % 10000 == 0:
            t.commit()
            t = db.transaction()
            print n
    t.commit()
Exemple #17
0
def load_fec_contributions():
    t = db.transaction(); n = 0
    db.delete('contribution', '1=1')
    for f in fec_cobol.parse_contributions():
        f = web.storage(f)
        f.occupation = f.occupation.replace('N/A', '')
        if '/' in f.occupation:
            employer, occupation = f.occupation.split('/', 1)
        else:
            employer = ''
            occupation = f.occupation
        
        try:
            datetime.date(*[int(x) for x in f.date.split('-')])
        except ValueError:
            f.date = None
        
        db.insert('contribution',
          fec_record_id = f.get('fec_record_id'),
          microfilm_loc = f.microfilm_loc,
          recipient_id = f.filer_id,
          name = f.name,
          street = f.get('street'),
          city = f.city,
          state = f.state,
          zip = f.zip,
          occupation = occupation,
          employer = employer,
          employer_stem = tools.stemcorpname(employer),
          committee = f.from_id or None,
          sent = f.date,
          amount = f.amount
        )
        n += 1
        if n % 10000 == 0: t.commit(); t = db.transaction(); print n
    t.commit()
    print "Creating indexes on table `contribution`..."
    schema.Contribution.create_indexes()
    print "done."
Exemple #18
0
def generate_similarities():
    """
    Generate similarity information for each (interest group, politician) pair and store in DB
    """
    result = db.query('select igbp.group_id, position.politician_id, igbp.support, position.vote'
                    ' from interest_group_bill_support igbp, position'
                    ' where igbp.bill_id = position.bill_id')
    sim = {}
    total = {}
             
    for r in result:
        k = (r.group_id, r.politician_id)
        if r.support == r.vote and r.support != 0:
            sim[k] = sim.get(k, 0) + 1
        total[k] = total.get(k, 0) + 1
    
    with db.transaction():
        db.delete('group_politician_similarity', '1=1')
        for k, agreed in sim.items():
            group_id, politician_id = k
            db.insert('group_politician_similarity', seqname=False, 
                group_id=group_id, politician_id=politician_id, agreed=agreed, total=total[k])
Exemple #19
0
def load_fec_efilings():
    for f in fec_csv.parse_efilings():
        for s in f['schedules']:
            if s['type'] == 'contribution':
                politician_id = None
                if f['candidate_fec_id']:
                    fec_id = f['candidate_fec_id']
                    pol_fec_id = list(db.select('politician_fec_ids', where='fec_id=$fec_id', vars=locals()))
                    if pol_fec_id and len(pol_fec_id) == 1:
                        politician_id = pol_fec_id[0].politician_id
                elif not politician_id and f['candidate']:
                    names = f['candidate'].split(' ')
                    fn, ln = names[0], names[-1]
                    pol = list(db.select('politician', where='lastname=$ln and firstname=$fn', vars=locals()))
                    if pol and len(pol) == 1:
                        politician_id = pol[0].id
                db.insert('contribution',
                          committee=f['committee'],
                          contrib_date=s['date'],
                          contributor_org=s['contributor_org'],
                          contributor=s['contributor'],
                          occupation=s['occupation'],
                          employer=s['employer'],
                          employer_stem=tools.stemcorpname(s['employer']),
                          candidate_name=f['candidate'],
                          filer_id=f['filer_id'],
                          report_id=f['report_id'],
                          amount=s['amount'])
            else:
                db.insert('expenditure',
                          candidate_name=f['candidate'],
                          committee=f['committee'],
                          expenditure_date=s['date'],
                          recipient=s['recipient'],
                          filer_id=f['filer_id'],
                          report_id=f['report_id'],
                          amount=s['amount'])