def _get_ocd_id(self, jurisdiction, precinct=False): if precinct: return "{}/county:{}/precinct:{}".format( self.mapping['ocd_id'], ocd_type_id(jurisdiction), ocd_type_id(precinct)) elif 'county' in self.mapping['ocd_id']: return "{}".format(self.mapping['ocd_id']) else: return "{}/county:{}".format( self.mapping['ocd_id'], ocd_type_id(jurisdiction))
def __call__(self): print((str(datetime.now()), "CreateResultsTransform begin")) results = self._create_results_collection() pipeline = [{"$match": {"state":'VT'} }] aggregatedResults = RawResult.objects.aggregate(*pipeline) # for rr in aggregatedResults: # print (rr) # fields = self.get_candidate_fields(rr['rr']) # fields['contest'] = self.get_contest(rr['rr']) # if fields['full_name'] == 'Write-Ins': # fields['flags'] = ['aggregate',] for rr in aggregatedResults: logResult("creating result: ", rr) fields = self._get_fields(rr, result_fields) fields['contest'] = self.get_contest(rr) fields['candidate'] = self.get_candidate(rr, extra={'contest': fields['contest'],}) fields['contest'] = fields['candidate'].contest # fields['raw_result'] = rr jurisdiction = rr.get('jurisdiction', None) parent_jurisdiction = rr.get('parent_jurisdiction', None) if fields['candidate']['full_name'] == 'Write-Ins': fields['write_in'] = True party = self.get_party(rr) if party: fields['party'] = party.abbrev fields['jurisdiction'] = jurisdiction # if precinct is same as town. if fields['reporting_level'] == "precinct": if not jurisdiction or jurisdiction == '': fields['jurisdiction'] = parent_jurisdiction fields['ocd_id'] = "ocd-division/country:us/state:vt/place:%s" % ocd_type_id(parent_jurisdiction) else: fields['jurisdiction'] = jurisdiction fields['ocd_id'] = "ocd-division/country:us/state:vt/place:%s/precinct:%s" % (ocd_type_id(parent_jurisdiction), ocd_type_id(jurisdiction)) else: fields['jurisdiction'] = jurisdiction fields['ocd_id'] = "ocd-division/country:us/state:vt/place:%s" % ocd_type_id(jurisdiction) fields = self._alter_result_fields(fields, rr) result = Result(**fields) results.append(result) self._create_results(results) print((str(datetime.now()), "Created %d results." % results.count()))
def _build_election_metadata_zipped_special(self, election): meta_entries = [] url_paths = self._url_paths_for_election(election['slug']) for path in url_paths: filename_kwargs = { 'reporting_level': path['reporting_level'], 'extension': '.txt', 'office': path['office'], 'office_district': path['district'], } if path['reporting_level'] == 'precinct': filename_kwargs['jurisdiction'] = path['jurisdiction'] jurisdiction = path['jurisdiction'] ocd_id = 'ocd-division/country:us/state:ar/county:{}'.format(ocd_type_id(jurisdiction)) else: jurisdiction = 'Arkansas' ocd_id = 'ocd-division/country:us/state:ar' filename = self._standardized_filename(election, **filename_kwargs) meta_entries.append({ 'generated_filename': filename, 'raw_url': path['url'], 'raw_extracted_filename': path['raw_extracted_filename'], 'ocd_id': ocd_id, 'name': jurisdiction, 'election': election['slug'], }) return meta_entries
def _prep_state_leg_results(self, row): kwargs = self._base_kwargs(row) kwargs.update({ 'reporting_level': 'state_legislative', 'winner': row['Winner'].strip(), 'write_in': self._writein(row), 'party': row['Party'].strip(), }) try: kwargs['write_in'] = row['Write-In?'].strip() # at the contest-level except KeyError as e: pass results = [] for field, val in list(row.items()): clean_field = field.strip() # Legislative fields prefixed with LEGS if not clean_field.startswith('LEGS'): continue kwargs.update({ 'jurisdiction': clean_field, 'ocd_id': "{}/sldl:{}".format(self._get_state_ocd_id(), ocd_type_id(clean_field)), 'votes': self._votes(val), }) results.append(RawResult(**kwargs)) return results
def _prep_county_result(self, row): kwargs = self._base_kwargs(row) vote_brkdown_fields = [ ('election_night_total', 'Election Night Votes'), ('absentee_total', 'Absentees Votes'), ('provisional_total', 'Provisional Votes'), ('second_absentee_total', '2nd Absentees Votes'), ] vote_breakdowns = {} for field, key in vote_brkdown_fields: try: vote_breakdowns[field] = row[key].strip() except KeyError: pass kwargs.update({ 'reporting_level': 'county', 'jurisdiction': self.mapping['name'], 'ocd_id': "{}/county:{}".format(self._get_state_ocd_id(), ocd_type_id(self.mapping['name'])), 'party': row['Party'].strip(), 'votes': self._votes(row['Total Votes']), }) if (kwargs['office'] not in self.district_offices and kwargs['district'] != ''): kwargs['reporting_level'] = 'congressional_district_by_county' kwargs['reporting_district'] = kwargs['district'] del kwargs['district'] return RawResult(**kwargs)
def load(self): self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'precinct' # Store result instances for bulk loading results = [] with self._file_handle as csvfile: reader = unicodecsv.DictReader(csvfile) next(reader, None) for row in reader: if self._skip_row(row): continue if row['votes'] == 'X': continue rr_kwargs = self._common_kwargs.copy() rr_kwargs['primary_party'] = row['party'].strip() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) jurisdiction = row['precinct'].strip() county_ocd_id = [c for c in self.datasource._jurisdictions() if c['county'].strip().upper() == row['county'].strip().upper()][0]['ocd_id'] rr_kwargs.update({ 'party': row['party'].strip(), 'jurisdiction': jurisdiction, 'parent_jurisdiction': row['county'], 'ocd_id': "{}/precinct:{}".format(county_ocd_id, ocd_type_id(jurisdiction)), 'office': row['office'].strip(), 'district': row['district'].strip(), 'votes': int(float(row['votes'])) }) results.append(RawResult(**rr_kwargs)) RawResult.objects.insert(results)
def _get_ocd_id(self, raw_result, reporting_level=None): """ Returns the OCD ID for a RawResult's reporting level. Arguments: raw_result - the RawResult instance used to determine the OCD ID reporting_level - the reporting level to reflect in the OCD ID. Default is raw_result.reporting_level. Specifying this argument is useful if you want to use a RawResult's jurisdiction, but override the reporting level. """ if reporting_level is None: reporting_level = raw_result.reporting_level juris_ocd = ocd_type_id(raw_result.jurisdiction) if reporting_level == "county": # TODO: Should jurisdiction/ocd_id be different for Baltimore City? return "ocd-division/country:us/state:md/county:%s" % juris_ocd elif reporting_level == "state_legislative": return "ocd-division/country:us/state:md/sldl:%s" % juris_ocd elif reporting_level == "precinct": return "%s/precinct:%s" % (raw_result.county_ocd_id, juris_ocd) else: return None
def load(self): self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'county' # Store result instances for bulk loading results = [] p = clarify.Parser() p.parse(self._file_handle) for result in p.results: if self._skip_row(result.contest): continue if row['county'].strip() == '': total_votes = int(row['votes'].strip()) else: rr_kwargs = self._common_kwargs.copy() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) jurisdiction = row['county'].strip() rr_kwargs.update({ 'jurisdiction': jurisdiction, 'ocd_id': "{}/county:{}".format(self.mapping['ocd_id'], ocd_type_id(jurisdiction)), 'office': row['office'].strip(), 'district': row['district'].strip(), 'votes': int(row['votes'].strip()) }) results.append(RawResult(**rr_kwargs)) RawResult.objects.insert(results)
def _get_ocd_id(self, raw_result, jurisdiction=None, reporting_level=None): """ Returns the OCD ID for a RawResult's reporting level. Arguments: raw_result: the RawResult instance used to determine the OCD ID jurisdiction: the jurisdiction for which the OCD ID should be created. Default is the raw result's jurisdiction field. reporting_level: the reporting level to reflect in the OCD ID. Default is raw_result.reporting_level. Specifying this argument is useful if you want to use a RawResult's jurisdiction, but override the reporting level. """ if reporting_level is None: reporting_level = raw_result.reporting_level if jurisdiction is None: jurisdiction = raw_result.jurisdiction juris_ocd = ocd_type_id(jurisdiction) if reporting_level == "county": # TODO: Should jurisdiction/ocd_id be different for Baltimore City? return "ocd-division/country:us/state:md/county:%s" % juris_ocd elif reporting_level == "state_legislative": return "ocd-division/country:us/state:md/sldl:%s" % juris_ocd elif reporting_level == "precinct": county_ocd_id = "/".join(raw_result.ocd_id.split('/')[:-1]) return "%s/precinct:%s" % (county_ocd_id, juris_ocd) else: return None
def load(self): # use first row as headers, not pre-canned list # need to use OCD_ID from jurisdiction in mapping self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'precinct' # Store result instances for bulk loading results = [] with self._file_handle as csvfile: reader = unicodecsv.DictReader(csvfile, fieldnames=("Jurisdiction", "Precinct", "office", "candidate", "Votes")) next(reader, None) next(reader, None) next(reader, None) next(reader, None) for row in reader: if self._skip_row(row): continue rr_kwargs = self._common_kwargs.copy() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) ocd_id = [c for c in self.datasource._jurisdictions() if c['jurisdiction'] == row['Jurisdiction']][0]['ocd_id'] jurisdiction = row['Jurisdiction'].strip() if row['Votes'].strip() == '*': votes = 'N/A' else: votes = int(row['Votes'].replace(',', '').strip()) rr_kwargs.update({ 'jurisdiction': jurisdiction, 'ocd_id': "{}/precinct:{}".format(self.mapping['ocd_id'], ocd_type_id(row['Precinct'])), 'votes': votes }) results.append(RawResult(**rr_kwargs)) RawResult.objects.insert(results)
def load(self): headers = [ 'county', 'office', 'district', 'candidate', 'incumbent', 'party', 'votes', 'pct' ] self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'county' # Store result instances for bulk loading results = [] with self._file_handle as csvfile: reader = unicodecsv.DictReader(csvfile, fieldnames=headers) for row in reader: if self._skip_row(row): continue rr_kwargs = self._common_kwargs.copy() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) jurisdiction = row['county'].strip() rr_kwargs.update({ 'jurisdiction': jurisdiction, 'parent_jurisdiction': "Texas", 'ocd_id': "{}/county:{}".format(self.mapping['ocd_id'], ocd_type_id(jurisdiction)), 'votes': self._votes(row['votes']) }) results.append(RawResult(**rr_kwargs)) RawResult.objects.insert(results)
def _prep_state_leg_results(self, row): kwargs = self._base_kwargs(row) kwargs.update({ 'reporting_level': 'state_legislative', 'winner': row['Winner'].strip(), 'write_in': self._writein(row), 'party': row['Party'].strip(), }) try: kwargs['write_in'] = row['Write-In?'].strip() # at the contest-level except KeyError as e: pass results = [] for field, val in list(row.items()): clean_field = field.strip() # Legislative fields prefixed with LEGS if not clean_field.startswith('LEGS'): continue kwargs.update({ 'jurisdiction': clean_field, # Remove the "LEGS " from the ocd_id. This is a somewhat # transformy action, but do it here in order to make the OCD IDs # as usable as possible when we bake out raw results 'ocd_id': "{}/sldl:{}".format(self._get_state_ocd_id(), ocd_type_id(clean_field.replace("LEGS ", ""))), 'votes': self._votes(val), }) results.append(RawResult(**kwargs)) return results
def _build_contest_kwargs(self, row): return { 'ocd_id': "{}/county:{}".format(self.mapping['ocd_id'], ocd_type_id(row['county'].strip())), 'jurisdiction': row['county'].strip(), 'office': row['office'].strip(), 'district': row['district'].strip(), }
def test_ocd_type_id(self): # Test that function converst spaces to underscores and # non-word characters to tildes self.assertEqual(ocd_type_id("Prince George's"), u"prince_george~s") # Test that leading zeros are stripped by default self.assertEqual(ocd_type_id("03D"), u"3d") # Test that we can force keeping leading zeros self.assertEqual(ocd_type_id("03D", False), u"03d") # Test that hyphens are not escaped self.assertEqual(ocd_type_id("001-000-1"), u"1-000-1") # Test that leading zero stripping can be supressed. self.assertEqual(ocd_type_id("001-000-1", False), u"001-000-1")
def load(self): headers = [ 'CandidateUid', 'FirstName', 'MiddleName', 'LastName', 'Suffix', 'TOTAL_VOTES', 'Party', 'WriteInVote', 'LocalityUid', 'LocalityCode', 'LocalityName', 'PrecinctUid', 'PrecinctName', 'DistrictUid', 'DistrictType', 'DistrictName', 'OfficeUid', 'OfficeTitle', 'ElectionUid', 'ElectionType', 'ElectionDate', 'ElectionName' ] self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'precinct' # Store result instances for bulk loading results = BulkInsertBuffer(RawResult) with self._file_handle as csvfile: reader = unicodecsv.DictReader(csvfile, fieldnames=headers) for row in reader: if self._skip_row(row): continue rr_kwargs = self._common_kwargs.copy() if 'primary' in self.mapping['election']: rr_kwargs['primary_party'] = row['Party'].strip() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) rr_kwargs.update(self._build_write_in_kwargs(row)) rr_kwargs.update(self._build_total_votes(row)) parent_jurisdiction = [c for c in self.datasource._jurisdictions() if int(c['fips']) == int(row['LocalityCode'])][0] if row['PrecinctUid'].strip() == '': ocd_id = parent_jurisdiction['ocd_id'] else: ocd_id = "{}/precinct:{}".format(parent_jurisdiction['ocd_id'], ocd_type_id(str(row['PrecinctName']))) rr_kwargs.update({ 'party': row['Party'].strip(), 'jurisdiction': str(row['PrecinctName']), 'parent_jurisdiction': parent_jurisdiction['name'], 'ocd_id': ocd_id }) results.append(RawResult(**rr_kwargs)) results.flush()
def _build_jurisdiction_kwargs(self, row): jurisdiction = row['ward'].strip() county_map = self.datasource._ocd_id_for_county_map() county_ocd_id = county_map[row['county'].strip().upper()] return { 'jurisdiction': jurisdiction, 'parent_jurisdiction': row['county'], 'ocd_id': "{}/precinct:{}".format(county_ocd_id, ocd_type_id(jurisdiction)), }
def _ocd_id_for_url_path(self, url_path): # This method is needed because there can be a url path for either # a single, statewide file or a file that contains results for only # one county. ocd_id = "ocd-division/country:us/state:wa" if url_path['jurisdiction']: # A jurisdiction is specified, which means that results are # broken down per-county ocd_id = "{}/county:{}".format(ocd_id, ocd_type_id(url_path['jurisdiction'])) return ocd_id
def load(self): headers = [ 'parish', 'precinct', 'office', 'district', 'party', 'candidate', 'votes' ] self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'precinct' # Store result instances for bulk loading results = [] with self._file_handle as csvfile: reader = unicodecsv.DictReader(csvfile, fieldnames=headers) for row in reader: if self._skip_row(row): continue rr_kwargs = self._common_kwargs.copy() rr_kwargs['primary_party'] = row['party'].strip() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) county_ocd_id = [c for c in self.datasource._jurisdictions() if c['name'].upper().replace(' ','') == row['parish'].upper().replace(' ','')][0]['ocd_id'] if row['precinct'].strip() == 'Early Voting' or row['precinct'].strip() == 'Provisional Votes': jurisdiction = None ocd_id = "{}/parish:{}".format(self.mapping['ocd_id'], ocd_type_id(row['parish'].strip())) else: ocd_id = "{}/precinct:{}".format(county_ocd_id, ocd_type_id(row['precinct'])) jurisdiction = row['precinct'].strip() rr_kwargs.update({ 'party': row['party'].strip(), 'jurisdiction': jurisdiction, 'parent_jurisdiction': row['parish'], 'ocd_id': ocd_id, 'office': row['office'].strip(), 'district': row['district'].strip(), 'votes': int(row['votes'].strip()) }) results.append(RawResult(**rr_kwargs)) RawResult.objects.insert(results)
def _prep_precinct_result(self, row): kwargs = self._base_kwargs(row) precinct = str(row['precinct']).strip() county_ocd_id = [c for c in self.datasource._jurisdictions() if c['county'].upper() == row['county'].upper()][0]['ocd_id'] kwargs.update({ 'reporting_level': 'precinct', 'jurisdiction': precinct, 'parent_jurisdiction': row['county'], 'ocd_id': "{}/precinct:{}".format(county_ocd_id, ocd_type_id(precinct)), 'party': row['party'].strip(), 'votes': self._votes(row['total_votes']) }) return RawResult(**kwargs)
def _prep_precinct_result(self, row, office, district, candidate, county, votes): kwargs = self._base_kwargs(row, office, district, candidate) precinct = str(row[1]).strip() county_ocd_id = [c for c in self.datasource._jurisdictions() if c['county'].upper() == county.upper()][0]['ocd_id'] kwargs.update({ 'reporting_level': 'precinct', 'jurisdiction': precinct, 'parent_jurisdiction': county, 'ocd_id': "{}/precinct:{}".format(county_ocd_id, ocd_type_id(precinct)), 'party': candidate[1], 'votes': self._votes(votes) }) return RawResult(**kwargs)
def _get_county_ocd_id(self, jurisdiction): """ Build an OCD ID for a county-level jurisdiction when the mapping reflects the state OCD ID. """ # Baltimore City is treated like a county in the results, but we # should use the city's OCD ID if jurisdiction == "Baltimore City": ocd_id = "{}/place:baltimore".format(self.mapping['ocd_id']) else: ocd_id = "{}/county:{}".format(self.mapping['ocd_id'], ocd_type_id(jurisdiction)) return ocd_id
def load(self): headers = [ 'year', 'election', 'office', 'party', 'district', 'candidate', 'county', 'votes', 'winner' ] self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'county' # Store result instances for bulk loading results = [] with self._file_handle as csvfile: reader = unicodecsv.DictReader(csvfile, fieldnames=headers, encoding='latin-1') for row in reader: if self._skip_row(row): continue if row['county'].strip() == 'Totals': total_votes = int(row['votes'].strip()) contest_winner = row['winner'].strip() else: rr_kwargs = self._common_kwargs.copy() rr_kwargs['primary_party'] = row['party'].strip() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) jurisdiction = row['county'].strip() rr_kwargs.update({ 'party': row['party'].strip(), 'jurisdiction': jurisdiction, 'ocd_id': "{}/county:{}".format(self.mapping['ocd_id'], ocd_type_id(jurisdiction)), 'office': row['office'].strip(), 'district': row['district'].strip(), 'votes': int(row['votes'].strip()), 'winner': row['winner'].strip(), 'total_votes': total_votes, 'contest_winner': contest_winner }) results.append(RawResult(**rr_kwargs)) RawResult.objects.insert(results)
def _prep_precinct_result(self, precinct, county, candidate, office, party, votes): # each precinct has multiple candidate totals, plus write-ins, over and under votes kwargs = self._base_kwargs(candidate, office, party) if party: kwargs.update({'primary_party': party}) kwargs.update({ 'reporting_level': 'precinct', 'jurisdiction': precinct, 'parent_jurisdiction': county, 'ocd_id': "{}/precinct:{}".format(self.mapping['ocd_id'], ocd_type_id(precinct)), 'votes': votes, 'vote_breakdowns': {}, }) return RawResult(**kwargs)
def _prep_precinct_result(self, row): kwargs = self._common_kwargs.copy() kwargs.update(self._build_contest_kwargs(row)) kwargs.update(self._build_candidate_kwargs(row)) precinct = str(row['precinct_id']+' '+row['polling_location']).strip() county_ocd_id = [c for c in self.datasource._jurisdictions() if c['county'].upper() == row['county_name'].upper()][0]['ocd_id'] kwargs.update({ 'reporting_level': 'precinct', 'jurisdiction': precinct, 'parent_jurisdiction': row['county_name'], 'ocd_id': "{}/precinct:{}".format(county_ocd_id, ocd_type_id(str(row['precinct_id']))), 'party': row['party'].strip(), 'votes': self._votes(row['votes']) }) return RawResult(**kwargs)
def _prep_precinct_result(self, row): kwargs = self._base_kwargs(row) kwargs.update(self._build_contest_kwargs(row)) kwargs.update(self._build_candidate_kwargs(row)) precinct = str(row['precinct']).strip() county_ocd_id = [c for c in self.datasource._jurisdictions() if c['county'].upper() == row['county'].upper()][0]['ocd_id'] kwargs.update({ 'reporting_level': 'precinct', 'jurisdiction': precinct, 'parent_jurisdiction': row['county'], 'ocd_id': "{}/precinct:{}".format(county_ocd_id, ocd_type_id(precinct)), 'party': row['party_cd'].strip(), 'votes': self._votes(row['ballot_count']) }) return RawResult(**kwargs)
def _prep_precinct_result(self, row): kwargs = self._base_kwargs(row) precinct = "%s-%s" % (row['Election District'], row['Election Precinct'].strip()) ocd_id = "{}/precinct:{}".format(self.mapping['ocd_id'], ocd_type_id(precinct)) kwargs.update({ 'reporting_level': 'precinct', 'jurisdiction': precinct, 'ocd_id': ocd_id, 'party': row['Party'].strip(), 'votes': self._votes(row['Election Night Votes']), 'votes_type': 'election_day', 'winner': row['Winner'], 'write_in': self._writein(row), }) return RawResult(**kwargs)
def load(self): headers = [ 'candidate', 'office', 'district', 'party', 'county', 'votes', 'winner' ] self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'county' # Store result instances for bulk loading # We use a BulkInsertBuffer because the load process was running out of # memory on prod-1 results = BulkInsertBuffer(RawResult) with self._file_handle as csvfile: reader = unicodecsv.DictReader(csvfile, fieldnames=headers, encoding='latin-1') for row in reader: if self._skip_row(row): continue if row['county'].strip() == '': total_votes = int(row['votes'].strip()) contest_winner = row['winner'].strip() else: rr_kwargs = self._common_kwargs.copy() rr_kwargs['primary_party'] = row['party'].strip() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) jurisdiction = row['county'].strip() rr_kwargs.update({ 'party': row['party'].strip(), 'jurisdiction': jurisdiction, 'ocd_id': "{}/county:{}".format(self.mapping['ocd_id'], ocd_type_id(jurisdiction)), 'office': row['office'].strip(), 'district': row['district'].strip(), 'votes': int(row['votes'].strip()) }) results.append(RawResult(**rr_kwargs)) # Flush any remaining results that are still in the buffer and need # to be inserted. results.flush()
def load(self): headers = [ 'candidate', 'office', 'district', 'party', 'county', 'precinct', 'votes', 'winner' ] self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'precinct' # Store result instances for bulk loading results = [] with self._file_handle as csvfile: reader = unicodecsv.DictReader(csvfile, fieldnames=headers) for row in reader: if self._skip_row(row): continue if row['precinct'].strip() == '': total_votes = int(row['votes'].strip()) contest_winner = row['winner'].strip() else: rr_kwargs = self._common_kwargs.copy() rr_kwargs['primary_party'] = row['party'].strip() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) jurisdiction = row['precinct'].strip() county_ocd_id = [ c for c in self.datasource._jurisdictions() if c['county'].upper() == row['county'].upper() ][0]['ocd_id'] rr_kwargs.update({ 'party': row['party'].strip(), 'jurisdiction': jurisdiction, 'parent_jurisdiction': row['county'], 'ocd_id': "{}/precinct:{}".format(county_ocd_id, ocd_type_id(jurisdiction)), 'office': row['office'].strip(), 'district': row['district'].strip(), 'votes': int(row['votes'].strip()) }) results.append(RawResult(**rr_kwargs)) RawResult.objects.insert(results)
def load(self): self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'precinct' # Store result instances for bulk loading results = [] num_skipped = 0 with self._file_handle as csvfile: reader = unicodecsv.DictReader(csvfile) for row in reader: if self._skip_row(row): num_skipped += 1 continue rr_kwargs = self._common_kwargs.copy() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) # The 'votes' column gets screwed up a lot, so handle it # by additionally printing debug information. try: rr_kwargs.update({'votes': int(row['votes'])}) except ValueError as e: print('Bad votes in row {}'.format(row)) raise e county = row['county'].strip() county_ocd_id = self._get_county_ocd_id(county) precinct = row['precinct'].strip() if precinct: precinct_ocd_id = "{}/precinct:{}".format( county_ocd_id, ocd_type_id(precinct)), rr_kwargs.update({ 'ocd_id': precinct_ocd_id, 'jurisdiction': precinct, 'parent_jurisdiction': county, }) else: rr_kwargs.update({ 'ocd_id': county_ocd_id, 'jurisdiction': county, 'parent_jurisdiction': 'ocd-division/country:us/state:in', }) results.append(RawResult(**rr_kwargs)) print('\tInserting {} results (skipped {} rows)'.format(len(results), num_skipped)) RawResult.objects.insert(results)
def _build_election_metadata_clarity_precinct(self, election, fmt, jurisdiction): meta_entries = [] for path in self._clarity_precinct_url_paths(election, fmt, jurisdiction): jurisdiction_name = path['jurisdiction'] ocd_id = 'ocd-division/country:us/state:ar/county:{}'.format(ocd_type_id(jurisdiction_name)) filename = self._standardized_filename(election, jurisdiction=jurisdiction_name, reporting_level='precinct', extension='.'+fmt) meta_entries.append({ "generated_filename": filename, "raw_extracted_filename": "detail.{}".format(fmt), "raw_url": path['url'], "ocd_id": ocd_id, "name": jurisdiction_name, "election": election['slug'], }) return meta_entries
def load(self): # use first row as headers, not pre-canned list # need to use OCD_ID from jurisdiction in mapping self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'precinct' # Store result instances for bulk loading results = [] with self._file_handle as csvfile: reader = unicodecsv.DictReader(csvfile, encoding='latin-1', fieldnames=("Jurisdiction", "Precinct", "office", "candidate", "Votes")) next(reader, None) next(reader, None) next(reader, None) next(reader, None) for row in reader: if self._skip_row(row): continue rr_kwargs = self._common_kwargs.copy() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) ocd_id = [ c for c in self.datasource._jurisdictions() if c['jurisdiction'] == row['Jurisdiction'] ][0]['ocd_id'] jurisdiction = row['Precinct'].strip() if row['Votes'].strip() == '*': votes = 'N/A' else: votes = int(row['Votes'].replace(',', '').strip()) rr_kwargs.update({ 'jurisdiction': jurisdiction, 'parent_jurisdiction': row['Jurisdiction'], 'ocd_id': "{}/precinct:{}".format(self.mapping['ocd_id'], ocd_type_id(row['Precinct'])), 'votes': votes }) results.append(RawResult(**rr_kwargs)) RawResult.objects.insert(results)
def _build_election_metadata_clarity_precinct(self, election, fmt): meta_entries = [] for path in self._clarity_precinct_url_paths(election, fmt): jurisdiction = path['jurisdiction'] ocd_id = 'ocd-division/country:us/state:ar/county:{}'.format(ocd_type_id(jurisdiction)) filename = self._standardized_filename(election, jurisdiction=jurisdiction, reporting_level='precinct', extension='.'+fmt) meta_entries.append({ "generated_filename": filename, "raw_extracted_filename": "detail.{}".format(fmt), "raw_url": path['url'], "ocd_id": ocd_id, "name": jurisdiction, "election": election['slug'], }) return meta_entries
def _prep_precinct_result(self, row): kwargs = self._base_kwargs(row) precinct = "%s-%s" % (row['Election District'], row['Election Precinct'].strip()) ocd_id = "{}/precinct:{}".format(self.mapping['ocd_id'], ocd_type_id(precinct)) kwargs.update({ 'reporting_level': 'precinct', 'jurisdiction': precinct, 'parent_jurisdiction': self.mapping['name'], 'ocd_id': ocd_id, 'party': row['Party'].strip(), 'votes': self._votes(row['Election Night Votes']), 'votes_type': 'election_day', 'winner': row['Winner'], 'write_in': self._writein(row), }) return RawResult(**kwargs)
def load(self): headers = [ 'year', 'election', 'office', 'party', 'district', 'candidate', 'county', 'votes', 'winner' ] self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'county' # Store result instances for bulk loading results = [] with self._file_handle as csvfile: reader = unicodecsv.DictReader(csvfile, fieldnames=headers) for row in reader: if self._skip_row(row): continue if row['county'].strip() == 'Totals': total_votes = int(row['votes'].strip()) contest_winner = row['winner'].strip() else: rr_kwargs = self._common_kwargs.copy() rr_kwargs['primary_party'] = row['party'].strip() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) jurisdiction = row['county'].strip() rr_kwargs.update({ 'party': row['party'].strip(), 'jurisdiction': jurisdiction, 'ocd_id': "{}/county:{}".format(self.mapping['ocd_id'], ocd_type_id(jurisdiction)), 'office': row['office'].strip(), 'district': row['district'].strip(), 'votes': int(row['votes'].strip()), 'winner': row['winner'].strip(), 'total_votes': total_votes, 'contest_winner': contest_winner }) results.append(RawResult(**rr_kwargs)) RawResult.objects.insert(results)
def _prep_precinct_result(self, row): kwargs = self._base_kwargs(row) vote_breakdowns = { 'election_night_total': self._votes(row['Election Night Votes']) } precinct = "%s-%s" % (row['Election District'], row['Election Precinct'].strip()) kwargs.update({ 'reporting_level': 'precinct', 'jurisdiction': precinct, 'ocd_id': "{}/precinct:{}".format(self._get_state_ocd_id(), ocd_type_id(precinct)), 'party': row['Party'].strip(), 'votes': self._votes(row['Election Night Votes']), 'winner': row['Winner'], 'write_in': self._writein(row), 'vote_breakdowns': vote_breakdowns, }) return RawResult(**kwargs)
def _build_result_kwargs(self, row): jurisdiction = row['CountyName'].strip() kwargs = { 'party': row['PartyName'].strip(), 'jurisdiction': jurisdiction, 'ocd_id': "{}/county:{}".format(self.mapping['ocd_id'], ocd_type_id(jurisdiction)), 'votes': row['CanVotes'].strip() } if row['OfficeDesc'].strip() == "U.S. President by Congressional District": # Primary results for some contests provide the results # by congressional district in each county kwargs['reporting_level'] = 'congressional_district_by_county' kwargs['reporting_district'] = row['Juris1num'].strip() else: kwargs['reporting_level'] = 'county' return kwargs
def _build_result_kwargs(self, row): jurisdiction = row['CountyName'].strip() kwargs = { 'party': row['PartyName'].strip(), 'jurisdiction': jurisdiction, 'ocd_id': "{}/county:{}".format(self.mapping['ocd_id'], ocd_type_id(jurisdiction)), 'votes': row['CanVotes'].strip() } if row['OfficeDesc'].strip() == "U.S. President by Congressional District": # Primary results for some contests provide the results # by congressional district in each county kwargs['reporting_level'] = 'congressional_district_by_county' kwargs['reporting_district'] = row['Juris1num'].strip() else: kwargs['reporting_level'] = 'county' return kwargs
def load(self): headers = [ 'candidate', 'office', 'district', 'party', 'county', 'votes', 'winner' ] self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'county' # Store result instances for bulk loading # We use a BulkInsertBuffer because the load process was running out of # memory on prod-1 results = BulkInsertBuffer(RawResult) with self._file_handle as csvfile: reader = unicodecsv.DictReader(csvfile, fieldnames = headers) for row in reader: if self._skip_row(row): continue if row['county'].strip() == '': total_votes = int(row['votes'].strip()) contest_winner = row['winner'].strip() else: rr_kwargs = self._common_kwargs.copy() rr_kwargs['primary_party'] = row['party'].strip() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) jurisdiction = row['county'].strip() rr_kwargs.update({ 'party': row['party'].strip(), 'jurisdiction': jurisdiction, 'ocd_id': "{}/county:{}".format(self.mapping['ocd_id'], ocd_type_id(jurisdiction)), 'office': row['office'].strip(), 'district': row['district'].strip(), 'votes': int(row['votes'].strip()) }) results.append(RawResult(**rr_kwargs)) # Flush any remaining results that are still in the buffer and need # to be inserted. results.flush()
def load(self): headers = [ 'county', 'office', 'district', 'party', 'candidate', 'votes', ] self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'state' # Store result instances for bulk loading results = [] with self._file_handle as csvfile: reader = unicodecsv.DictReader(csvfile, fieldnames=headers, encoding='latin-1') for row in reader: if self._skip_row(row): continue rr_kwargs = self._common_kwargs.copy() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) if row['county'].strip() == '': jurisdiction = "Missouri" ocd_id = "ocd-division/country:us/state:mo" reporting_level = 'state' else: jurisdiction = row['county'].strip() ocd_id = "{}/county:{}".format(self.mapping['ocd_id'], ocd_type_id(jurisdiction)) reporting_level = 'county' rr_kwargs.update({ 'reporting_level': reporting_level, 'jurisdiction': jurisdiction, 'ocd_id': ocd_id, 'office': row['office'].strip(), 'district': row['district'].strip(), 'votes': int(row['votes'].strip()) }) results.append(RawResult(**rr_kwargs)) RawResult.objects.insert(results)
def __call__(self): results = [] for rr in self.get_rawresults(): # We only grab the meta fields here because we're aggregating results. # # We'll grab the votes explicitely later. # # Don't parse winner because it looks like it's reported as the # contest winner and not the jurisdiction winner. # # Don't parse write-in because this case is only for primaries and # I'm pretty sure there aren't any write-in candidates in those # contests. fields = self._get_fields(rr, meta_fields) fields['candidate'] = self.get_candidate(rr) fields['contest'] = fields['candidate'].contest party = self.get_party(rr) if party: fields['party'] = party.abbrev fields['reporting_level'] = 'congressional_district' fields['jurisdiction'] = self._strip_leading_zeros( rr.reporting_district) fields['ocd_id'] = "ocd-division/country:us/state:md/cd:%s" % ( ocd_type_id(fields['jurisdiction'])) # Instantiate a new result for this candidate, contest and jurisdiction, # but only do it once. result, instantiated = self._get_or_instantiate_result(fields) if instantiated: results.append(result) # Contribute votes from this particular raw result votes = result.votes if result.votes else 0 rr_votes = rr.votes if rr.votes else 0 votes += rr_votes result.votes = votes Result.objects.insert(results, load_bulk=False) print "Created %d results." % len(results)
def _prep_precinct_result(self, row, office, district, candidate, county, votes): kwargs = self._base_kwargs(row, office, district, candidate) precinct = str(row[1]).strip() county_ocd_id = [ c for c in self.datasource._jurisdictions() if c['county'].upper() == county.upper() ][0]['ocd_id'] kwargs.update({ 'reporting_level': 'precinct', 'jurisdiction': precinct, 'ocd_id': "{}/precinct:{}".format(county_ocd_id, ocd_type_id(precinct)), 'party': candidate[1], 'votes': self._votes(votes) }) return RawResult(**kwargs)
def _prep_precinct_result(self, row): kwargs = self._base_kwargs(row) precinct = str(row['Precinct']) county_ocd_id = [ c for c in self.datasource._jurisdictions() if c['county'].upper() == row['CountyName'].upper() ][0]['ocd_id'] kwargs.update({ 'reporting_level': 'precinct', 'jurisdiction': precinct, 'ocd_id': "{}/precinct:{}".format(county_ocd_id, ocd_type_id(precinct)), 'party': row['PartyName'].strip(), 'votes': self._votes(row['Votes']), 'vote_breakdowns': {}, }) return RawResult(**kwargs)
def load(self): headers = [ 'year', 'election_type', 'county_code', 'precinct_code', 'cand_office_rank', 'cand_district', 'cand_party_rank', 'cand_ballot_position', 'cand_office_code', 'cand_party_code', 'cand_number', 'cand_last_name', 'cand_first_name', 'cand_middle_name', 'cand_suffix', 'votes', 'congressional_district', 'state_senate_district', 'state_house_district', 'municipality_type_code', 'municipality', 'municipality_breakdown_code_1', 'municipality_breakdown_name_1', 'municipality_breakdown_code_2', 'municipality_breakdown_name_2', 'bicounty_code', 'mcd_code', 'fips_code', 'vtd_code', 'previous_precinct_code', 'previous_congressional_district', 'previous_state_senate_district', 'previous_state_house_district' ] self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'precinct' # Store result instances for bulk loading results = BulkInsertBuffer(RawResult) with self._file_handle as csvfile: if '2014' in self.election_id: reader = unicodecsv.DictReader((line.replace('\0','') for line in csvfile), fieldnames = headers, encoding='latin-1') else: reader = unicodecsv.DictReader(csvfile, fieldnames = headers, encoding='latin-1') for row in reader: if self._skip_row(row): continue rr_kwargs = self._common_kwargs.copy() if 'primary' in self.mapping['election']: rr_kwargs['primary_party'] = row['cand_party_code'].strip() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) county = [c for c in self.datasource._jurisdictions() if c['state_id'] == str(row['county_code'])][0]['name'] county_ocd_id = [c for c in self.datasource._jurisdictions() if c['state_id'] == str(row['county_code'])][0]['ocd_id'] rr_kwargs.update({ 'party': row['cand_party_code'].strip(), 'jurisdiction': str(row['precinct_code']), 'parent_jurisdiction': county, 'ocd_id': "{}/precinct:{}".format(county_ocd_id, ocd_type_id(str(row['precinct_code']))), 'votes': int(row['votes'].strip()), # PA-specific data 'congressional_district': row['congressional_district'], 'state_senate_district': row['state_senate_district'], 'state_house_district': row['state_house_district'], 'municipality_type_code': row['municipality_type_code'], 'municipality': row['municipality'], 'previous_precinct_code': row['previous_precinct_code'], 'previous_congressional_district': row['previous_congressional_district'], 'previous_state_senate_district': row['previous_state_senate_district'], 'previous_state_house_district': row['previous_state_house_district'] }) results.append(RawResult(**rr_kwargs)) results.flush()
def update_ocd_id(self, ocd_id, jurisdiction): ocd_id_bits = ocd_id.split('/') ocd_id_bits.pop() ocd_id_bits.append(ocd_type_id("precinct:%s" % jurisdiction)) return '/'.join(ocd_id_bits)