def generateCopyTemplate(file_destination): bill_fields = "id,bill_id,title,scraped_subjects" bills = [] pageNo = 1 while True: billsToAdd = openstates.bills(state="tx", search_window="session", fields=bill_fields, page=pageNo) if len(billsToAdd) == 0: break bills.extend(billsToAdd) pageNo += 1 billsToCopySheet = [] for bill in bills: if 'scraped_subjects' in bill: for subject in bill['scraped_subjects']: if 'Education--Higher' in subject: billsToCopySheet.append(bill) break if (not os.path.exists(file_destination)) and (not file_destination == ''): os.makedirs(file_destination) workbook = xlsxwriter.Workbook(os.path.join(file_destination, 'copysheet.xlsx')) bills_sheet = workbook.add_worksheet('Bills') bills_sheet.write_row(0, 0, ['key', 'Bill Summary', 'Bill ID', 'title']) for i in xrange(1, len(billsToCopySheet) + 1): bill = billsToCopySheet[i - 1] rowToAdd = [bill['id'], '', bill['bill_id'], bill['title']] bills_sheet.write_row(i, 0, rowToAdd) leg_fields = 'full_name,leg_id' legislators = openstates.legislators(state="tx", active=True, chamber='upper', fields=leg_fields) legislators.extend(openstates.legislators(state="tx", active=True, chamber='lower', fields=leg_fields)) leg_sheet = workbook.add_worksheet('Legislators') leg_sheet.write_row(0, 0, ['key', 'Higher Education', 'Name']) for i in xrange(1, len(legislators) + 1): legislator = legislators[i - 1] rowToAdd = [legislator['leg_id'], '', legislator['full_name']] leg_sheet.write_row(i, 0, rowToAdd) workbook.close()
def handle(self, *args, **options): if len(args) != 1: raise CommandError('Expected two digit state prefix') legs = openstates.legislators(state=args[0],active=True) self.stdout.write('Updating %s records\n' % len(legs)) for leg in legs: o = Officials() o.legid = leg['leg_id'] o.firstname = leg['first_name'] o.lastname = leg['last_name'] o.middlename = "" o.fullname = " ".join([o.firstname, o.middlename, o.lastname]) o.active = "True" o.state = args[0] o.chamber = leg['chamber'] o.district = leg['district'] o.party = leg['party'] o.transparencydataid = leg['transparencydata_id'] o.photourl = leg['photo_url'] o.createdat = leg['created_at'] o.updatedat = leg['updated_at'] o.twitter = "" o.facebook = "" o.personal_homepage = "" o.press_release_url = "" o.notes = "" o.xpath = "" o.press_release_url_dl = "" self.add_specific_details(o) o.save()
def handle(self, *args, **options): if settings.STATE_FILTER == '': raise CommandError('Expected two digit state prefix') legs = openstates.legislators(state=settings.STATE_FILTER ,active=True) self.stdout.write('Updating %s records\n' % len(legs)) for leg in legs: o = Officials() o.legid = leg['leg_id'] o.firstname = leg['first_name'] o.lastname = leg['last_name'] o.middlename = "" o.fullname = " ".join([o.firstname, o.middlename, o.lastname]) o.active = "True" o.state = settings.STATE_FILTER o.chamber = leg['chamber'] o.district = leg['district'] o.party = leg['party'] o.transparencydataid = leg['transparencydata_id'] o.photourl = leg['photo_url'] o.createdat = leg['created_at'] o.updatedat = leg['updated_at'] o.twitter = "" o.facebook = "" o.personal_homepage = "" o.press_release_url = "" o.notes = "" o.xpath = "" o.press_release_url_dl = "" self.add_specific_details(o) o.save()
def produceEnhancedDistrictJSONString(geoJSONString, chamber_string): leg_fields = "full_name,district,offices,party,roles,leg_id,photo_url" legislators = openstates.legislators(state="tx", active=True, chamber=chamber_string, fields=leg_fields) legislator_id_set = {} for legislator in legislators: legislator_id_set[legislator['leg_id']] = legislator legislator['higher_ed_bills'] = {'primary': [], 'cosponsor': []} legislator['photo_url'] = 'images/' + legislator['photo_url'].split( 'images/')[1] bill_fields = "id,sponsors,scraped_subjects" bills = openstates.bills(state="tx", search_window="session", fields=bill_fields) for bill in bills: try: for subject in bill['scraped_subjects']: if 'Education--Higher' in subject: addSponsorsToSet(legislator_id_set, bill) break except KeyError: print 'ERROR KeyError ' + str(bill) decoder = json.JSONDecoder() geoJSON = decoder.decode(geoJSONString) chamberName = '' if (chamber_string == 'upper'): chamberName = 'senate' else: chamberName = 'house' districts = geoJSON['objects'][chamberName]['geometries'] legislator_district_set = {} for legislator in legislators: try: legislator_district_set[int(legislator['district'])] = legislator except KeyError: # someone without a district, lt. governor, etc. print "'legislator' without district: %s" % str(legislator) continue for district in districts: if int(district['id']) in legislator_district_set: district['properties'] = {} district['properties']['legislator'] = legislator_district_set[int( district['id'])] addToLegislators(legislators) return json.dumps(obj=geoJSON, ensure_ascii=False, separators=(',', ':'))
def choose_politician(): if 'location' in request.args: location = request.args.get('location', '') else: location = '91101' if location.isdigit(): state = get_state_from_zip(location) # Congress API doesn't have as many entries url = 'http://congress.api.sunlightfoundation.com/legislators/locate?apikey='+str(keys.sunlight_key)+'&zip='+str(location) r = requests.get(url) r = r.json() congress = r['results'] # openstates legislators = openstates.legislators(state=state) else: # openstates legislators = openstates.legislators(state=location) congress=0 return render_template('choose_politician.html', legislators=legislators, congress=congress)
def produceEnhancedDistrictJSONString(geoJSONString, chamber_string): leg_fields = "full_name,district,offices,party,roles,leg_id,photo_url" legislators = openstates.legislators(state="tx", active=True, chamber=chamber_string, fields=leg_fields) legislator_id_set={} for legislator in legislators: legislator_id_set[legislator['leg_id']] = legislator legislator['higher_ed_bills'] = {'primary':[], 'cosponsor':[]} legislator['photo_url'] = 'images/' + legislator['photo_url'].split('images/')[1] bill_fields = "id,sponsors,scraped_subjects" bills = openstates.bills(state="tx", search_window="session", fields=bill_fields) for bill in bills: try: for subject in bill['scraped_subjects']: if 'Education--Higher' in subject: addSponsorsToSet(legislator_id_set, bill) break except KeyError: print 'ERROR KeyError ' + str(bill) decoder = json.JSONDecoder() geoJSON = decoder.decode(geoJSONString) chamberName = '' if (chamber_string == 'upper'): chamberName = 'senate' else: chamberName = 'house' districts = geoJSON['objects'][chamberName]['geometries'] legislator_district_set = {} for legislator in legislators: try: legislator_district_set[int(legislator['district'])] = legislator except KeyError: # someone without a district, lt. governor, etc. print "'legislator' without district: %s" % str(legislator) continue for district in districts: if int(district['id']) in legislator_district_set: district['properties'] = {} district['properties']['legislator'] = legislator_district_set[int(district['id'])] addToLegislators(legislators) return json.dumps(obj=geoJSON, ensure_ascii=False, separators=(',',':'))
def makeRepImagesLocal(folderPath): for chamber_string in ['upper', 'lower']: legislators = openstates.legislators(state="tx", active=True, chamber=chamber_string, fields="photo_url") for legislator in legislators: # print legislator['photo_url'].split('images/')[1] with open(os.path.join(folderPath, legislator['photo_url'].split('images/')[1].replace('small', 'large')), 'wb') as f: try: page = urllib2.urlopen(legislator['photo_url'].replace('small', 'large')) content = page.read() page.close() f.write(content) except: print legislator['photo_url'].replace('small', 'large') + " ERROR " + str(sys.exc_info()[0])
def run(): # get the ones that aren't federal or the state Governor. delegates = Delegate.objects.exclude(Q(group__abbr='Sen') | Q(group__abbr='Rep') | Q(group__abbr='Gov')) print 'Count delegates who are not Reps,Sens,Govs', delegates.count() match = [] nomatch = [] multimatch = [] for d in delegates[:50]: name = d.name name = name.replace(', Jr.', '') name = name.replace(' III', '') name = name.replace(' II', '') names = name.split() firstname = names[0] lastname = names[-1:][0] try: # these are state legislators, not our own (fed) Legislator object legislators = openstates.legislators( state=d.state.state.lower(), party='Democratic', first_name=firstname, last_name=lastname, ) print name sleep(0.5) if legislators and len(legislators) == 1: legislator = legislators[0] match.append("%s: %s" % (name, legislator['full_name'])) elif legislators: multimatch.append( "%s: %s" % (name, [f['full_name'] for f in legislators])) else: nomatch.append(name) continue; except: raise print "Match", len(match) print pprint.pprint(match) print "No match", len(nomatch) print pprint.pprint(nomatch) print "Multi match", len(multimatch) print pprint.pprint(multimatch)
def download_current_legislators(): pa_legislators = openstates.legislators( state='pa', active='true') for counter, leg in enumerate(pa_legislators): if counter % 5 == 0: print counter if Officials.objects.filter(pk=leg['id']).exists(): off = Officials.objects.get(pk=leg['id']) off.chamber = leg['chamber'] off.createdat = leg['created_at'] off.updatedat = leg['updated_at'] if 'photo_url' in leg: off.photourl = leg['photo_url'] off.district = leg['district'] off.party = leg['party'] if 'transparencydata_id' in leg: off.transparencydataid = leg['transparencydata_id'] off.save() else: new_off = Officials(legid=leg['leg_id'], fullname=leg['full_name'], firstname=leg['first_name'], middlename=leg['middle_name'], lastname=leg['last_name'], suffixes=leg['suffixes'], active=leg['active'], state=leg['state'], chamber=leg['chamber'], district=leg['district'], party=leg['party'], createdat=leg['created_at'], updatedat=leg['updated_at'], homepage=leg['url']) if 'transparencydata_id' in leg: new_off.transparencydataid = leg['transparencydata_id'] if 'photo_url' in leg: new_off.photourl = leg['photo_url'] new_off.save()
def download_current_legislators(): pa_legislators = openstates.legislators(state='pa', active='true') for counter, leg in enumerate(pa_legislators): if counter % 5 == 0: print counter if Officials.objects.filter(pk=leg['id']).exists(): off = Officials.objects.get(pk=leg['id']) off.chamber = leg['chamber'] off.createdat = leg['created_at'] off.updatedat = leg['updated_at'] if 'photo_url' in leg: off.photourl = leg['photo_url'] off.district = leg['district'] off.party = leg['party'] if 'transparencydata_id' in leg: off.transparencydataid = leg['transparencydata_id'] off.save() else: new_off = Officials(legid=leg['leg_id'], fullname=leg['full_name'], firstname=leg['first_name'], middlename=leg['middle_name'], lastname=leg['last_name'], suffixes=leg['suffixes'], active=leg['active'], state=leg['state'], chamber=leg['chamber'], district=leg['district'], party=leg['party'], createdat=leg['created_at'], updatedat=leg['updated_at'], homepage=leg['url']) if 'transparencydata_id' in leg: new_off.transparencydataid = leg['transparencydata_id'] if 'photo_url' in leg: new_off.photourl = leg['photo_url'] new_off.save()
def populateLawmakers(): import sunlight import json from sunlight import openstates id_lm_json = openstates.legislators(state="id", active="true") print id_lm_json id_lm = byteify(json.dumps(id_lm_json)) for lm in id_lm_json: lm_adder = Lawmaker( lm["leg_id"], lm["first_name"], lm["last_name"], lm["middle_name"], lm["district"], lm["chamber"], lm["url"], lm["email"], lm["party"], lm["photo_url"], ) db.session.add(lm_adder) db.session.commit() return id_lm
def metadata(self): logging.debug('Fetching legislators') return openstates.legislators( state=self.abbr, chamber=self.chamber, active=False)
def load_legislators(self): legislator_list = openstates.legislators(state='tx') for lawmaker in legislator_list: self.load_legislator(lawmaker)
nay=[self.NO], missing=[self.OTHER], not_in_legis=0.0, legis_names=tuple(self.leg_ids)) return rollcall if __name__ == '__main__': from sunlight import openstates, response_cache response_cache.enable('mongo') response_cache.logger.setLevel(10) # Wrangle the API data into a Rollcall object. spec = dict(state='al', chamber='lower', search_window='term:2011-2014') valid_ids = [leg['id'] for leg in openstates.legislators(**spec)] builder = RollcallBuilder(valid_ids) bills = openstates.bills(**spec) for bill in bills: bill = openstates.bill(bill['id']) for vote in bill['votes']: if vote['chamber'] != bill['chamber']: continue builder.add_vote(vote) rollcall = builder.get_rollcall() wnominate = rollcall.wnominate(polarity=('ALL000086', 'ALL000085')) wnom_values = wnominate.legislators.coord1D ideal = rollcall.ideal()
import csv from sunlight import openstates ok_legislators = openstates.legislators( state='ok', active='true', chamber='lower' ) ok_legislators_csv_key = ['leg_id'] ok_legislators_array = [] for legislator in ok_legislators: ok_legislators_csv_key.append(legislator['leg_id']) ok_legislators_array.append(legislator['leg_id']) with open('scores.csv', 'w') as w: writer = csv.DictWriter(w, fieldnames=ok_legislators_csv_key, extrasaction='ignore') writer.writeheader() for legislatorA in ok_legislators_array: print "Going through " + legislatorA leg_scores = {} leg_scores['leg_id'] = legislatorA for legislatorB in ok_legislators_array: # open the votes csv file with open('housevotes.csv') as f: reader = csv.DictReader(f)
#!/usr/bin/env python from sunlight import openstates import json states = openstates.all_metadata() ret = {} for state in states: abbr = state['abbreviation'] ret[abbr] = { "upper": [], "lower": [] } legislators = openstates.legislators(state=abbr) for leg in legislators: if "chamber" not in leg: continue # Dewhurst. chamber = leg['chamber'] party = leg['party'] ret[abbr][chamber].append(party[0]) open("openstates.json", 'w').write(json.dumps(ret))
import mwclient import re from sunlight import openstates import os upper = openstates.legislators(state='ca',chamber='upper') lower = openstates.legislators(state='ca',chamber='lower') #### Access your MW with bot/admin approved permissions with open(os.path.expanduser('~') + "/.invisible/mw.csv", 'r') as f: e = f.read() keys = e.split(',') print(keys) login_user = keys[0] #consumer_key login_password = keys[1] #consumer_secret ua = 'CCWPTool run by User:1A' #UserAgent bot note site = mwclient.Site(('https', 'www.climatepolitics.info'), path='/w/',) site.login(login_user, login_password) save_note = "Bot creating US CA profiles" default = "" #Create a result for dictionary response when key does not occure count = 0 for x in upper: new_page = a_page = insert = fn = a = b = c = d = e = f = g = h = i = j = k = l = m = n = o = p = q = r = "" a = '{{US CA Upper' +'\n' fn = str(x.get("first_name", default)) #BC first_name includes unwanted middle initials
#!/usr/bin/env python from sunlight import openstates import sys import codecs sys.stdout = codecs.getwriter('utf-8')(sys.stdout) state = sys.argv[1] kwargs = { "state": state } legis = openstates.legislators(**kwargs) for leg in legis: search = openstates.legislators( first_name=leg['first_name'], last_name=leg['last_name'], active="false", state=state ) for s in search: if s['leg_id'] != leg['leg_id']: print s['full_name'] print leg['full_name'] print " %s / %s" % ( s['leg_id'], leg['leg_id'] ) print ""
#!/usr/bin/env python from sunlight import openstates import sys import codecs sys.stdout = codecs.getwriter('utf-8')(sys.stdout) state = sys.argv[1] kwargs = {"state": state} legis = openstates.legislators(**kwargs) for leg in legis: search = openstates.legislators(first_name=leg['first_name'], last_name=leg['last_name'], active="false", state=state) for s in search: if s['leg_id'] != leg['leg_id']: print s['full_name'] print leg['full_name'] print " %s / %s" % (s['leg_id'], leg['leg_id']) print ""
from sunlight import openstates from transparencydata import TransparencyData td = TransparencyData(apikey) import pandas as pd """ This program obtains general information on the donors to a particular legislator. It first obtains all the legislators for the state of CA, and then cycles through all of those to keep relevant information and put it in a pandas data frame. It then writes that pandas dataframe to a mysql database """ # obtain the list of legislators in teh current session all_legs = openstates.legislators(state='ca') # obtain all donor information for the particular legislator in the past 3 years. index = -1 for leg in all_legs: thiscontribution = td.contributions(cycle='2013|2014|2015', recipient_ft=leg['last_name'].lower(), recipient_state='ca') df = pd.DataFrame(thiscontribution) # df.columns has the name of the column index = index + 1 print index if not df.empty: # remove frames which are not of interest. del df['candidacy_status'] del df['committee_ext_id'] del df['committee_party'] del df['contributor_ext_id']
def run(): # fix in database or in ingest or with source Legislator.objects.filter(bioguide_id__in=('Q000024', 'S000275')).update(in_office=False) lines = open('SandersDemocrats.csv', 'r').readlines() # skip first two lines so DictReader uses them for keys lines = lines[2:] line_buffer = StringIO.StringIO('\n'.join(lines)) reader = csv.DictReader(line_buffer, restval='') multiple_legislators = [] missing_legislators = [] #State,Name,Level,Office,District,Status,Sanders Dem Profile,Notes,Congressional Primary Date,Endorsement,Website,Facebook,Twitter,Email,Donate,Primary Win,General Election Win,Party,img for r in reader: # new fields not in data serving = running = False name = r.get('Name') if not name: continue state_obj = State.objects.get(name=r.get('State')) #if state_obj.state not in ('MA', 'NH', 'VT'): #continue; state = state_obj.state.lower() status = r.get('Status').strip() if status.startswith('Serving'): serving = True if 'Re-election' in status: running = True if status == 'Candidate': running = True district = r.get('District') level = r.get('Level') notes = r.get('Notes').lstrip('*').strip() image_url = r.get('img') office = r.get('Office') try: primary_date = tzaware_from_string(r.get('Congressional Primary Date'), '%m/%d/%Y') except ValueError: primary_date = None profile_url = r.get('Sanders Dem Profile') endorsement_url = r.get('Endorsement') website_url = r.get('Website') donate_url = r.get('Donate') facebook_id = r.get('Facebook') twitter_id = r.get('Twitter') primary_win = r.get('Primary Win') try: primary_win = bool(int(primary_win)) except: primary_win = False general_election_win = r.get('General Election Win') try: general_election_win = bool(int(general_election_win)) except: general_election_win = False party = r.get('Party') if facebook_id: m = re.search( r'https?://(www.)?facebook.com/(\w#!/)?(pages/)?(([\w-]/)*)?(?P<id>[\w.-]+)', facebook_id) if m: facebook_id = m.group('id') else: log.error('Bad facebook id: %s', facebook_id) if twitter_id: try: twitter_id = re.sub(r'\?.*', '', twitter_id) m = re.search(r'^(https*://twitter.com/|@)([A-Za-z0-9_]+)$', twitter_id) if m: twitter_id = m.group(2) assert len(twitter_id) <= 15 except AssertionError: log.error('Twitter id too long: %s', twitter_id) tmpname = name tmpname = tmpname.replace(', Jr.', '') tmpname = tmpname.replace(' III', '') tmpname = tmpname.replace(' II', '') names = tmpname.split() firstname = names[0] lastname = names[-1:][0] # if level not in ('State', 'Federal'): # continue # if office not in ('House', 'Senate'): # continue # expat changes if district == 'VT': district = '0' ## One at-large rep from VT if state == 'ma' and firstname == 'Jamie' and lastname == 'Eldridge': firstname = 'James' if state == 'ma' and firstname == 'Pat' and lastname == 'Jehlen': firstname = 'Patricia' if state == 'vt' and lastname == 'Pollina' and firstname == 'Anthony': chamber = 'upper' # sunlight errors if state == 'ma' and firstname == 'Mary' and lastname == 'Keefe': firstname = 'Mary S.' if state == 'me' and firstname == 'James' and lastname == 'Campbell': firstname = 'James J.' if state == 'nh' and firstname == 'Andrew' and lastname == 'White': firstname = 'Andrew A.' if state == 'nh' and firstname == 'Andy' and lastname == 'Schmidt': firstname = 'Andrew R.' if state == 'nh' and firstname == 'Geoffrey' and lastname == 'Hirsch': firstname = 'Geoffrey D.' if state == 'nh' and firstname == 'George' and lastname == 'Sykes': firstname = 'George E.' if state == 'nh' and firstname == 'Gilman' and lastname == 'Shattuck': firstname = 'Gilman C.' if state == 'nh' and firstname == 'Jane' and lastname == 'Beaulieu': firstname = 'Jane E.' if state == 'nh' and firstname == 'Lee' and lastname == 'Oxenham': firstname = 'Lee Walker' if state == 'nh' and firstname == 'Marcia' and lastname == 'Moody': firstname = 'Marcia G.' if state == 'nh' and firstname == 'Patrick' and lastname == 'Long': firstname = 'Patrick T.' if state == 'nh' and firstname == 'Peter' and lastname == 'Bixby': firstname = 'Peter W.' if state == 'nh' and firstname == 'Richard' and lastname == 'McNamara': firstname = 'Richard D.' if state == 'nh' and firstname == 'Robert' and lastname == 'Cushing': firstname = 'Robert R.' if state == 'nh' and firstname == 'Robert' and lastname == 'Theberge': firstname = 'Robert L.' if state == 'nh' and firstname == 'Tim' and lastname == 'Smith': firstname = 'Timothy J.' if state == 'nh' and firstname == 'Wayne' and lastname == 'Burton': firstname = 'Wayne M.' if state == 'vt' and firstname == 'Bill' and lastname == 'Frank': firstname = 'William' if state == 'vt' and firstname == 'Linda' and lastname == 'Martin': firstname = 'Linda J.' if state == 'vt' and firstname == 'Mary' and lastname == 'Hooper': firstname = 'Mary S.' if state == 'vt' and firstname == 'Mollie' and lastname == 'Burke': firstname = 'Mollie S.' if state == 'vt' and firstname == 'Steve' and lastname == 'Berry': firstname = 'Steven' if state == 'vt' and firstname == 'Warren' and lastname == 'Kitzmiller': firstname = 'Warren F.' if state == 'vt' and firstname == 'Tim' and lastname == 'Jerman': firstname = 'Timothy' # create candidate here, then tweak below new_values = { 'profile_url':profile_url, 'website_url':website_url, 'twitter_id':twitter_id, 'facebook_id':facebook_id, 'donate_url':donate_url, 'endorsement_url':endorsement_url, 'notes':notes, 'image_url':image_url, 'primary_date':primary_date, 'level':level, 'office':office, 'district':district, 'status':status, 'serving':serving, 'running':running, 'winner':primary_win, 'party':party, } # treat the combination of state and name as unique for now, but # don't enforce candidate, created = Candidate.objects.update_or_create( state=state_obj, name=name, defaults=new_values ) print '%s %s' % ('Created' if created else 'Updated', candidate) # First go through the incumbents and cross-reference with sunlight # openstates dataset to get information if 'Serving' in status and level in ('Federal', 'State') and office in ('House', 'Senate'): legislators = None if level == 'Federal': if office == 'House': legislators = Legislator.objects.filter(state=state_obj, district=district, in_office=True) elif office == 'Senate': legislators = Legislator.objects.filter(state=state_obj, in_office=True) elif level == 'State': if office == 'House': legislators = openstates.legislators( state=state, chamber='lower', active=True, last_name=lastname, district=district ) sleep(0.5) if not legislators: legislators = openstates.legislators( state=state, chamber='lower', active=True, last_name=lastname, first_name=firstname, ) sleep(0.5) elif office == 'Senate': legislators = openstates.legislators( state=state, chamber='upper', active=True, last_name=lastname, district=district ) sleep(0.5) if not legislators: legislators = openstates.legislators( state=state, chamber='upper', active=True, last_name=lastname, first_name=firstname, ) sleep(0.5) #if legislators: #legislators = [(l['full_name'],l['district']) for l in legislators] if legislators: if len(legislators) == 1: leg_obj = legislators[0] if type(leg_obj) == dict: # these values are showing up in the json, but are not documented. # id is a duplicate of leg_id that conflicts with # the 'id' in the Django model leg_obj.pop('id', None) leg_obj.pop('nimsp_candidate_id', None) leg_obj.pop('nimsp_id', None) leg_obj.pop('csrfmiddlewaretoken', None) leg_obj.pop('nickname', None) leg_obj.pop('office_phone', None) leg_obj.pop('office_address', None) leg_offices = leg_obj.pop('offices', None) # capitalize the state abbreviations leg_obj['state'] = leg_obj['state'].upper() # sunlight fields with '+' char are non-standard, ignore for k in leg_obj.keys(): if k.startswith('+'): leg_obj.pop(k, None) leg_obj['created_at'] = tzaware_from_string(leg_obj['created_at'], '%Y-%m-%d %H:%M:%S') leg_obj['updated_at'] = tzaware_from_string(leg_obj['updated_at'], '%Y-%m-%d %H:%M:%S') leg_id = leg_obj.pop('leg_id') state_leg, created = StateLegislator.objects.update_or_create(leg_id=leg_id, defaults=leg_obj) candidate.state_legislator = state_leg candidate.district = leg_obj['district'] # replace old offices with new ones state_leg.offices.clear() for office in leg_offices: ofc = Office.objects.create(**office) state_leg.offices.add(ofc) else: candidate.legislator = leg_obj candidate.save() else: multiple_legislators.append( { 'candidate':candidate, 'matches':[(l, type(l)) for l in legislators] } ) print [(l, type(l)) for l in legislators] #log.info(name) #print name, state, district, ' -- ', legislators else: missing_legislators.append("%s (%s %s) %s %s %s %s %s" % ( name, firstname, lastname, state.upper(), level, office, district, status)) print("%s (%s %s) %s %s %s %s %s" % ( name, firstname, lastname, state.upper(), level, office, district, status)) #log.error("%s (%s %s) %s %s %s %s %s", name, firstname, #lastname, state, level, office, district, status) #print name, (firstname, lastname,), state, level, office, district, status #print state, r.get('Level'), r.get('Office'), r.get('District'), r.get('Status') elif status == 'Candidate': pass else: print "Unknown status: '%s'" % status print "These need to be processed manually" print "Multiples" print pprint.pprint(multiple_legislators) print "Missing from Sunlight data" print pprint.pprint(missing_legislators) sys.exit(0)
# import packages we need: openstates, csv, and regex from sunlight import openstates import csv import re # api call using openstates package. # see example output: http://sunlightlabs.github.io/openstates-api/bills.html#examples/bill-search oklahoma_bills = openstates.bills( state='ok', search_window='term:2015-2016' ) # oklahoma legislators ok_legislators = openstates.legislators( state='ok', active='true' ) # we need an array of the legislators ids # with the first value being bill_id for our # csv header row ok_legislators_array = ['bill_id', 'chamber', 'vote_id'] for legislator in ok_legislators: ok_legislators_array.append(legislator['leg_id']) # create or open votes.csv file, with write with open('votes.csv', 'w') as f: # create writer object on the file we named f # extrasaction parameter means that if there is a missing or extra leg_id in our array # then the writer will continue regardless writer = csv.DictWriter(f, fieldnames=ok_legislators_array, extrasaction='ignore')
def run(): response = urllib2.urlopen(url) berniecrats = json.loads(response.read().decode('utf-8')).get('berniecrats') statuses = berniecrats[0].get('status') offices = berniecrats[1].get('offices') candidates = berniecrats[2].get('candidates') status_dict = list_to_dict(statuses) office_dict = list_to_dict(offices) multiple_legislators = [] missing_legislators = [] data = [] found = [] not_found = [] possible_match = [] for c in candidates: #print c.get('firstName'), c.get('lastName') first_name = c.get('firstName').strip() last_name = c.get('lastName').strip() name = '%s %s' % (first_name, last_name) state = State.objects.get(state=c.get('state')) party = c.get('partyCode') serving = c.get('isIncumbent') level = get_level(c) office = get_office(c, office_dict) district = get_district(c) website_url = c.get('website') endorsed_by_bernie = c.get('endorsedByBernie') endorsement_url = c.get('infoUrl') endorsement_text = c.get('infoLink') primary_win = get_null_bool(c.get('electPrimary')) general_win = get_null_bool(c.get('electGeneral')) #updated = tzaware_from_string(c.get('lastUpdate'), '%Y-%m-%d %H:%M:%S') #created = tzaware_from_string(c.get('createDate'), '%Y-%m-%d %H:%M:%S') status = c.get('status') notes = c.get('statusMsg') running = status == "0" facebook_id = sanitize_facebook(c.get('facebook')) twitter_id = sanitize_twitter(c.get('twitter')) if name == 'Philip Cornell': name = 'Phil Cornell' if name == 'Dave Zuckerman': name = 'David Zuckerman' if party == 'VP D': party = 'VPP' data.append([ name, party, state.state, level, office, district, twitter_id, facebook_id]) # treat the combination of state and name as unique for now, but # don't enforce candidate = None try: candidate = Candidate.objects.get( state=state, name=name, ) found.append('%s %s' % (candidate.state.state, candidate.name)) except Candidate.DoesNotExist: try: min_fname = first_name.split()[0] min_lname = re.sub(r',*\s*[JS]r\.\s*', '', last_name) candidate = Candidate.objects.get( state=state, name__startswith=min_fname, name__endswith=min_lname, ) found.append('%s %s' % (candidate.state.state, candidate.name)) except Candidate.DoesNotExist: try: min_lname = re.sub(r',*\s*[JS]r\.\s*', '', last_name) candidate = Candidate.objects.get( state=state, name__endswith=min_lname, ) possible_match.append('%s %s (%s %s ?)' % ( candidate.state.state, candidate.name, first_name, last_name)) except Candidate.DoesNotExist: not_found.append(state.state + ' ' + name) #print matrix_to_string(data) # if level not in ('State', 'Federal'): # continue # if office not in ('House', 'Senate'): # continue sname = state.state.lower() district = fix_district(district) first_name, last_name = fix_names(sname, first_name, last_name) # create candidate here, then tweak below new_values = { #'profile_url':None, #'donate_url':None, #'image_url':None, #'primary_date':None, #'status':status, 'notes':notes, 'first_name':first_name, 'last_name':last_name, 'website_url':website_url, 'twitter_id':twitter_id, 'facebook_id':facebook_id, 'endorsed_by_bernie':endorsed_by_bernie, 'endorsement_url':endorsement_url, 'endorsement_text':endorsement_url, 'level':level, 'office':office, 'district':district, 'serving':serving, 'running':running, 'primary_win':primary_win, 'general_win':general_win, 'party':party, } if candidate: updated = False created = False candidate.mismatch = [] for k,v in new_values.items(): # if missing value, then merge if not getattr(candidate, k): setattr(candidate, k, v) updated = True # values are identical then continue elif getattr(candidate, k) == v: pass # twitter_id values are identical (case insensitive) elif k == 'twitter_id' and getattr(candidate, k).lower() == v.lower(): pass # values are different elif v: candidate.mismatch.append("\t.%s: '%s' != '%s'" % (k, getattr(candidate, k), v)) else: candidate = Candidate( state=state, name=name, **new_values ) created = True if hasattr(candidate, 'mismatch') and candidate.mismatch: print candidate.name, if candidate.legislator: print "[%s %s %s]" % ('Federal', candidate.legislator.title, candidate.legislator.district), elif candidate.state_legislator: print "[%s %s %s]" % ('State', candidate.state_legislator.chamber, candidate.state_legislator.district), print print '\n'.join(candidate.mismatch) # Okay, now save it log.debug('Candidate: %s', candidate) candidate.save() # First go through the incumbents and cross-reference with sunlight # openstates dataset to get information if candidate.serving and level in ('Federal', 'State') and office in ('House', 'Senate'): legislators = None if level == 'Federal': if office == 'House': legislators = Legislator.objects.filter(state=state, district=district, in_office=True) elif office == 'Senate': legislators = Legislator.objects.filter(state=state, in_office=True) elif level == 'State': if office == 'House': legislators = openstates.legislators( state=state.state, chamber='lower', active=True, last_name=last_name, district=district ) sleep(0.5) if not legislators: legislators = openstates.legislators( state=state.state, chamber='lower', active=True, last_name=last_name, first_name=first_name, ) sleep(0.5) elif office == 'Senate': legislators = openstates.legislators( state=state.state, chamber='upper', active=True, last_name=last_name, district=district ) sleep(0.5) if not legislators: legislators = openstates.legislators( state=state.state, chamber='upper', active=True, last_name=last_name, first_name=first_name, ) sleep(0.5) #if legislators: #legislators = [(l['full_name'],l['district']) for l in legislators] if legislators: if len(legislators) == 1: leg_obj = legislators[0] if type(leg_obj) == dict: # these values are showing up in the json, but are not documented. # id is a duplicate of leg_id that conflicts with # the 'id' in the Django model leg_obj.pop('id', None) leg_obj.pop('nimsp_candidate_id', None) leg_obj.pop('nimsp_id', None) leg_obj.pop('csrfmiddlewaretoken', None) leg_obj.pop('nickname', None) leg_obj.pop('office_phone', None) leg_obj.pop('office_address', None) leg_offices = leg_obj.pop('offices', None) # capitalize the state abbreviations leg_obj['state'] = leg_obj['state'].upper() # sunlight fields with '+' char are non-standard, ignore for k in leg_obj.keys(): if k.startswith('+'): leg_obj.pop(k, None) leg_obj['created_at'] = tzaware_from_string(leg_obj['created_at'], '%Y-%m-%d %H:%M:%S') leg_obj['updated_at'] = tzaware_from_string(leg_obj['updated_at'], '%Y-%m-%d %H:%M:%S') leg_id = leg_obj.pop('leg_id') state_leg, created = StateLegislator.objects.update_or_create(leg_id=leg_id, defaults=leg_obj) candidate.state_legislator = state_leg candidate.district = leg_obj['district'] # replace old offices with new ones state_leg.offices.clear() for office in leg_offices: ofc = Office.objects.create(**office) state_leg.offices.add(ofc) else: candidate.legislator = leg_obj candidate.save() else: multiple_legislators.append( { 'candidate':candidate, 'matches':[(l, type(l)) for l in legislators] } ) #print [(l, type(l)) for l in legislators] #log.info(name) #print name, state, district, ' -- ', legislators else: missing_legislators.append("%s (%s %s) %s %s %s %s %s" % ( name, first_name, last_name, state.state.upper(), level, office, district, status)) print("%s (%s %s) %s %s %s %s %s" % ( name, first_name, last_name, state.state.upper(), level, office, district, status)) #log.error("%s (%s %s) %s %s %s %s %s", name, firstname, #lastname, state, level, office, district, status) #print name, (firstname, lastname,), state, level, office, district, status #print state, r.get('Level'), r.get('Office'), r.get('District'), r.get('Status') print "These need to be processed manually" print "Multiples" print pprint.pprint(multiple_legislators) print "Missing from Sunlight data" print pprint.pprint(missing_legislators) db_candidates = ['%s %s' % (c.state.state, c.name) for c in Candidate.objects.all()] db_only = list(set(db_candidates) - set(found)) print 'Found', len(found) pprint.pprint(sorted(found)) print 'Possible Match', len(possible_match) pprint.pprint(sorted(possible_match)) print 'Not Found', len(not_found) pprint.pprint(sorted(not_found)) print 'DB Only', len(db_only) pprint.pprint(sorted(db_only)) # create dictionary with state keys and lists of candidates as items # for candidates only in database db_only_dict = {} for c in db_only: state, name = c.split(' ',1) try: db_only_dict[state].append(name) except KeyError: db_only_dict[state] = [] db_only_dict[state].append(name) # create dictionary with state keys and lists of candidates as items # for candidates not found in database not_found_dict = {} for c in not_found: state, name = c.split(' ',1) try: not_found_dict[state].append(name) except KeyError: not_found_dict[state] = [] not_found_dict[state].append(name) # print out by state for s in State.objects.exclude(name='Unassigned'): try: col1 = db_only_dict[s.state] except KeyError: col1 = [] try: col2 = not_found_dict[s.state] except KeyError: col2 = [] if not col1 and not col2: continue print print s.name print '-' * len(s.name) print '{:<10}'.format('DB ONLY'), ', '.join(col1) print '{:<10}'.format('NOT FOUND'), ', '.join(col2) print matrix_to_string([[col1], [col2]])