def scrape_dupage(): COUNTY_NAME = "DuPage" # sets URLs DUPAGE_RACE_URL = get_results_url() PRECINCTS_URL = get_precincts_url() # gets data data = requests.get(DUPAGE_RACE_URL).json() precincts_data = requests.get(PRECINCTS_URL).json() # gets precinct info precincts_reporting = precincts_data['settings'][ 'numberofprecinctsreporting'] precincts_total = precincts_data['settings']['totalprecinctsreporting'] # creates empty list for results info dupage_county_results = [] for datum in data: if datum['CAT'] == "Propositions": options = datum['CH'] votes = datum['V'] race_obj = initialize_race_obj(datum['C'], precincts_reporting, precincts_total, COUNTY_NAME) for option_index, (option, vote) in enumerate(zip(options, votes)): if option == "Yes/Sí": # specific to DuPage option = "Yes" else: pass race_obj["reporting_units"][0]['candidates'].append({ "first_name": "", "middle_name": "", "last_name": option.title(), "vote_count": int(vote), "ballot_order": int(option_index + 1) }) dupage_county_results.append(race_obj) elif datum['CAT'] == "County" or datum['CAT'] == "Judicial": candidates = datum['CH'] cand_votes = datum['V'] cand_parties = datum['P'] race_obj = initialize_race_obj(datum['C'], precincts_reporting, precincts_total, COUNTY_NAME) for cand_index, (candidate, cand_vote, cand_party) in enumerate( zip(candidates, cand_votes, cand_parties)): if candidate == "Yes/Sí": # specific to DuPage candidate = "Yes" else: pass full_name = pp.parse( candidate, 'person') # uses probablepeople to parse names into a list first_name, middle_name, last_name = parse_name(full_name) race_obj["reporting_units"][0]['candidates'].append({ "first_name": first_name, "middle_name": middle_name, "last_name": last_name, "vote_count": int(cand_vote), "party": cand_party, "ballot_order": int(cand_index + 1) }) dupage_county_results.append(race_obj) with open('scrapers/dupage_data.json', 'w', encoding='utf-8') as f: json.dump(dupage_county_results, f, ensure_ascii=False, indent=4) return dupage_county_results # this should be commented out when running the app # leave it in if you're just testing the scraper # scrape_dupage()
def scrape_kendall(): COUNTY_NAME = "Kendall" # sets URLs KENDALL_RACE_URL = 'https://results.co.kendall.il.us/' #gets data html = urllib.request.urlopen(KENDALL_RACE_URL).read() soup = BeautifulSoup(html, 'html.parser') # print(soup) # creates empty list for results info kendall_county_results = [] data = soup.find('pre').text precincts_total = 87 rows = data.splitlines() # print(rows) for index, row in enumerate(rows): if row.startswith(" PRECINCTS"): precincts_reporting = int(row[-2:]) if row == "COUNTY BOARD MEMBER-DIST.1": dist1_race_name = row dist1_race_obj = initialize_race_obj(dist1_race_name, precincts_reporting, precincts_total, COUNTY_NAME) if index >= 115 and index <= 119: # will need to double-check this every once in a while, hard-coded cand_index = int(str(index)[-1:]) - 2 cand_info, full_name, party = get_candidate_info(row) first_name, middle_name, last_name = parse_name(full_name) # votes = 99 votes = get_vote_count(cand_info) formatted_candidate_info = get_candidates_in_race_obj( first_name, middle_name, last_name, votes, party, cand_index) dist1_race_obj["reporting_units"][0]['candidates'].append( formatted_candidate_info) if row == "COUNTY BOARD MEMBER-DIST.2": dist2_race_name = row dist2_race_obj = initialize_race_obj(dist2_race_name, precincts_reporting, precincts_total, COUNTY_NAME) if index >= 124 and index <= 129: # will need to double-check this every once in a while, hard-coded cand_index = int(str(index)[-1:]) - 1 cand_info, full_name, party = get_candidate_info(row) first_name, middle_name, last_name = parse_name(full_name) votes = get_vote_count(cand_info) formatted_candidate_info = get_candidates_in_race_obj( first_name, middle_name, last_name, votes, party, cand_index) dist2_race_obj["reporting_units"][0]['candidates'].append( formatted_candidate_info) kendall_county_results.append(dist1_race_obj) kendall_county_results.append(dist2_race_obj) with open('scrapers/kendall_data.json', 'w', encoding='utf-8') as f: json.dump(kendall_county_results, f, ensure_ascii=False, indent=4) return kendall_county_results # this should be commented out when running the app # leave it in if you're just testing the scraper # scrape_kendall()
def scrape_mchenry(): COUNTY_NAME = "McHenry" # URLs MCHENRY_RACE_URL = get_results_url() PRECINCTS_URL = get_precincts_url() data = requests.get(MCHENRY_RACE_URL).json() precincts_data = requests.get(PRECINCTS_URL).json() # gets precinct info precincts_reporting = precincts_data['settings'][ 'numberofprecinctsreporting'] precincts_total = precincts_data['settings']['totalprecinctsreporting'] mchenry_county_results = [] for datum in data: if datum['CAT'] == 'County' and datum['SUBCAT'] == 'Questions': options = datum['CH'] votes = datum['V'] race_name = datum['C'] race_obj = initialize_race_obj(datum['C'], precincts_reporting, precincts_total, COUNTY_NAME) for option_index, (option, vote) in enumerate(zip(options, votes)): race_obj["reporting_units"][0]['candidates'].append({ "first_name": "", "middle_name": "", "last_name": option.title(), "vote_count": int(vote), "ballot_order": int(option_index + 1) }) mchenry_county_results.append(race_obj) elif datum['CAT'] == "County" and datum['SUBCAT'] != "Questions": candidates = datum['CH'] cand_votes = datum['V'] cand_parties = datum['P'] race_name = datum['C'] race_obj = initialize_race_obj(datum['C'], precincts_reporting, precincts_total, COUNTY_NAME) for cand_index, (candidate, cand_vote, cand_party) in enumerate( zip(candidates, cand_votes, cand_parties)): full_name = pp.parse( candidate, 'person') # uses probablepeople to parse names into a list first_name, middle_name, last_name = parse_name(full_name) race_obj["reporting_units"][0]['candidates'].append({ "first_name": first_name, "middle_name": middle_name, "last_name": last_name, "vote_count": int(cand_vote), "party": cand_party, "ballot_order": int(cand_index + 1) }) mchenry_county_results.append(race_obj) with open('scrapers/mchenry_data.json', 'w', encoding='utf-8') as f: json.dump(mchenry_county_results, f, ensure_ascii=False, indent=4) return mchenry_county_results # this should be commented out when running the app # leave it in if you're just testing the scraper # scrape_mchenry()
def scrape_lake(): COUNTY_NAME = "Lake County" # sets URLs LAKE_RACE_URL = get_results_url() PRECINCTS_URL = get_precincts_url() # gets data data = requests.get(LAKE_RACE_URL).json() precincts_data = requests.get(PRECINCTS_URL).json() # gets precinct info precincts_reporting = precincts_data['settings'][ 'numberofprecinctsreporting'] precincts_total = precincts_data['settings']['totalprecinctsreporting'] # creates empty list for results info lake_county_results = [] for datum in data: race_name = datum['C'] candidates = datum['CH'] cand_votes = datum['V'] cand_parties = datum['P'] race_obj = initialize_race_obj(datum['C'], precincts_reporting, precincts_total, COUNTY_NAME) for cand_index, (candidate, cand_vote, cand_party) in enumerate( zip(candidates, cand_votes, cand_parties)): full_name = pp.parse( candidate, 'person') # uses probablepeople to parse names into a list first_name, middle_name, last_name = parse_name(full_name) # appends to candidates list race_obj["reporting_units"][0]['candidates'].append({ "first_name": first_name, "middle_name": middle_name, "last_name": last_name, "vote_count": int(cand_vote), "ballot_order": int(cand_index + 1) }) # print(race_obj) lake_county_results.append(race_obj) # print(lake_county_results) with open('scrapers/lake_data.json', 'w', encoding='utf-8') as f: json.dump(lake_county_results, f, ensure_ascii=False, indent=4) return lake_county_results # this should be commented out when running the app # leave it in if you're just testing the scraper # scrape_lake()
def scrape_cook(): ## This scraper loops through the results txt data (SummaryExport.txt) and matches only with data from cook-IDs.csv. ## It only adds in the race_obj if the race name doesn't exist in `added`, ## which starts as an empty list. Within that for loop exists another for+if loop that loops through the ## `cook_county_results` list and adds the current race's candidate info. get_txtfile() COUNTY_NAME = "Cook County" cook_county_results = [] added = [] with open('scrapers/cook-IDs.csv', newline='') as f: reader = csv.reader(f) cook_info = list(reader) with open('scrapers/updated_cook.txt', 'r') as r: # should be name of newly-written file results_data = r.readlines() # This matches results races to dict races by the first seven characters of the record. for results_row in results_data: current_ID_match = results_row[0:7] #RESULTS for info_line in cook_info: full_ID_match = info_line[0][0:7] #CONTEXT if current_ID_match == full_ID_match: full_ID = info_line[0] race_name = info_line[1].title() candidate = info_line[2] full_name = pp.parse( candidate, 'person') # uses probablepeople to parse names into a list first_name, middle_name, last_name = parse_name(full_name) precincts_total = int(results_row[7:11]) vote_count = int(results_row[11:18]) precincts_reporting = int(results_row[18:22]) cand_party = full_ID[22:25] ballot_order = int(info_line[0][4:7]) if race_name not in added: # creates object in format of race object for use in TribPub's Google Sheet race_obj = initialize_race_obj(race_name, precincts_reporting, precincts_total, COUNTY_NAME) cook_county_results.append(race_obj) added.append(race_name) else: pass for item in cook_county_results: if item['name'] == race_name.title(): first_name, middle_name, last_name = parse_name( full_name) item['reporting_units'][0]['candidates'].append({ "first_name": first_name, "middle_name": middle_name, "last_name": last_name, "vote_count": int(vote_count), "ballot_order": int(ballot_order) }) else: pass else: pass # print(cook_county_results) with open('scrapers/cook_data.json', 'w', encoding='utf-8') as f: json.dump(cook_county_results, f, ensure_ascii=False, indent=4) return cook_county_results # this should be commented out when running the app # leave it in if you're just testing the scraper # scrape_cook()
def scrape_kane(): COUNTY_NAME = "Kane County" kane_data = get_results_url() # creates empty list for results info kane_county_results = [] race_data = kane_data.findAll( "h2" ) # h2 gets each race name, findPrevious/Next/Children is based on this for race in race_data: candidates = [] votes = [] # finds precincts reporting and total precincts finding_precincts_info = race.findPrevious('td') precincts_info = finding_precincts_info.findPrevious('td') precincts = list(map(int, re.findall(r'\d+', str( precincts_info)))) # gets integers from precincts line, makes list precincts_reporting = precincts[0] precincts_total = precincts[1] # print(precincts_reporting, precincts_total) cands = race.findNext('table') names = cands.findChildren('td') for name in names: name = str(name) if name.startswith('<td>'): # splits may be necessary to pinpoint just name # appends each name to candidates list if '(Write-In)' in name: name = name.split('<b>', 2) name_split = name[0] cand_name_split = name_split.split('>', 2) cand_name = cand_name_split[1] candidates.append(cand_name) # print('appended', cand_name) elif '(Independent)' in name or '(Democratic)' in name or '(Republican)' in name: candidate_split = name.rsplit('(', 1) candidate = candidate_split[0] cand_name_split = candidate.split('>', 1) cand_name = cand_name_split[1] candidates.append(cand_name) # print(cand_name) else: name_split = name.split('>', 2) name_split = str(name_split[1]) final_name = name_split.split('</', 2) cand_name = final_name[0] candidates.append(cand_name) # print('appended', cand_name) if '<b>' in name: name_split = name.split('</b>', 2) name_split = str(name_split[0]) final_name = name_split.split('<b>', 2) if '%' not in final_name[1]: # separates vote percentages from vote counts # appends votes to votes list cand_votes = final_name[1] votes.append(cand_votes) # print('appended', cand_votes) race = str(race) race_split = race.split('<br/>', 2) race_split = race_split[0] final_race_name = race_split.split('>', 2) race_name = final_race_name[1] # creates object in format of race object for use in TribPub's Google Sheet race_obj = initialize_race_obj(race_name, precincts_reporting, precincts_total, COUNTY_NAME) for option_index, (candidate, vote) in enumerate(zip(candidates, votes)): full_name = pp.parse( candidate, 'person') # uses probablepeople to parse names into a list first_name, middle_name, last_name = parse_name(full_name) race_obj["reporting_units"][0]['candidates'].append({ "first_name": first_name, "middle_name": middle_name, "last_name": last_name, "vote_count": int(vote), "ballot_order": int(option_index + 1) }) kane_county_results.append(race_obj) with open('scrapers/kane_data.json', 'w', encoding='utf-8') as f: json.dump(kane_county_results, f, ensure_ascii=False, indent=4) return kane_county_results # this should be commented out when running the app # leave it in if you're just testing the scraper # scrape_kane()