def load(self): headers = [ 'CandidateUid', 'FirstName', 'MiddleName', 'LastName', 'Suffix', 'TOTAL_VOTES', 'Party', 'WriteInVote', 'LocalityUid', 'LocalityCode', 'LocalityName', 'PrecinctUid', 'PrecinctName', 'DistrictUid', 'DistrictType', 'DistrictName', 'OfficeUid', 'OfficeTitle', 'ElectionUid', 'ElectionType', 'ElectionDate', 'ElectionName' ] self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'precinct' # Store result instances for bulk loading results = BulkInsertBuffer(RawResult) with self._file_handle as csvfile: reader = unicodecsv.DictReader(csvfile, fieldnames=headers) for row in reader: if self._skip_row(row): continue rr_kwargs = self._common_kwargs.copy() if 'primary' in self.mapping['election']: rr_kwargs['primary_party'] = row['Party'].strip() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) rr_kwargs.update(self._build_write_in_kwargs(row)) rr_kwargs.update(self._build_total_votes(row)) parent_jurisdiction = [c for c in self.datasource._jurisdictions() if int(c['fips']) == int(row['LocalityCode'])][0] if row['PrecinctUid'].strip() == '': ocd_id = parent_jurisdiction['ocd_id'] else: ocd_id = "{}/precinct:{}".format(parent_jurisdiction['ocd_id'], ocd_type_id(str(row['PrecinctName']))) rr_kwargs.update({ 'party': row['Party'].strip(), 'jurisdiction': str(row['PrecinctName']), 'parent_jurisdiction': parent_jurisdiction['name'], 'ocd_id': ocd_id }) results.append(RawResult(**rr_kwargs)) results.flush()
def load(self): headers = [ 'candidate', 'office', 'district', 'party', 'county', 'votes', 'winner' ] self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'county' # Store result instances for bulk loading # We use a BulkInsertBuffer because the load process was running out of # memory on prod-1 results = BulkInsertBuffer(RawResult) with self._file_handle as csvfile: reader = unicodecsv.DictReader(csvfile, fieldnames = headers) for row in reader: if self._skip_row(row): continue if row['county'].strip() == '': total_votes = int(row['votes'].strip()) contest_winner = row['winner'].strip() else: rr_kwargs = self._common_kwargs.copy() rr_kwargs['primary_party'] = row['party'].strip() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) jurisdiction = row['county'].strip() rr_kwargs.update({ 'party': row['party'].strip(), 'jurisdiction': jurisdiction, 'ocd_id': "{}/county:{}".format(self.mapping['ocd_id'], ocd_type_id(jurisdiction)), 'office': row['office'].strip(), 'district': row['district'].strip(), 'votes': int(row['votes'].strip()) }) results.append(RawResult(**rr_kwargs)) # Flush any remaining results that are still in the buffer and need # to be inserted. results.flush()
def load(self): headers = [ 'CandidateUid', 'FirstName', 'MiddleName', 'LastName', 'Suffix', 'TOTAL_VOTES', 'Party', 'WriteInVote', 'LocalityUid', 'LocalityCode', 'LocalityName', 'PrecinctUid', 'PrecinctName', 'DistrictUid', 'DistrictType', 'DistrictName', 'OfficeUid', 'OfficeTitle', 'ElectionUid', 'ElectionType', 'ElectionDate', 'ElectionName' ] self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'precinct' # Store result instances for bulk loading results = BulkInsertBuffer(RawResult) with self._file_handle as csvfile: reader = unicodecsv.DictReader(csvfile, fieldnames = headers, encoding='latin-1') for row in reader: if self._skip_row(row): continue rr_kwargs = self._common_kwargs.copy() if 'primary' in self.mapping['election']: rr_kwargs['primary_party'] = row['Party'].strip() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) rr_kwargs.update(self._build_write_in_kwargs(row)) rr_kwargs.update(self._build_total_votes(row)) parent_jurisdiction = [c for c in self.datasource._jurisdictions() if int(c['fips']) == int(row['LocalityCode'])][0] if row['PrecinctUid'].strip() == '': ocd_id = parent_jurisdiction['ocd_id'] else: ocd_id = "{}/precinct:{}".format(parent_jurisdiction['ocd_id'], ocd_type_id(str(row['PrecinctName']))) rr_kwargs.update({ 'party': row['Party'].strip(), 'jurisdiction': str(row['PrecinctName']), 'parent_jurisdiction': parent_jurisdiction['name'], 'ocd_id': ocd_id }) results.append(RawResult(**rr_kwargs)) results.flush()
def load(self): headers = [ 'candidate', 'office', 'district', 'party', 'county', 'votes', 'winner' ] self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'county' # Store result instances for bulk loading # We use a BulkInsertBuffer because the load process was running out of # memory on prod-1 results = BulkInsertBuffer(RawResult) with self._file_handle as csvfile: reader = unicodecsv.DictReader(csvfile, fieldnames=headers, encoding='latin-1') for row in reader: if self._skip_row(row): continue if row['county'].strip() == '': total_votes = int(row['votes'].strip()) contest_winner = row['winner'].strip() else: rr_kwargs = self._common_kwargs.copy() rr_kwargs['primary_party'] = row['party'].strip() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) jurisdiction = row['county'].strip() rr_kwargs.update({ 'party': row['party'].strip(), 'jurisdiction': jurisdiction, 'ocd_id': "{}/county:{}".format(self.mapping['ocd_id'], ocd_type_id(jurisdiction)), 'office': row['office'].strip(), 'district': row['district'].strip(), 'votes': int(row['votes'].strip()) }) results.append(RawResult(**rr_kwargs)) # Flush any remaining results that are still in the buffer and need # to be inserted. results.flush()
def load(self): headers = [ 'year', 'election_type', 'county_code', 'precinct_code', 'cand_office_rank', 'cand_district', 'cand_party_rank', 'cand_ballot_position', 'cand_office_code', 'cand_party_code', 'cand_number', 'cand_last_name', 'cand_first_name', 'cand_middle_name', 'cand_suffix', 'votes', 'congressional_district', 'state_senate_district', 'state_house_district', 'municipality_type_code', 'municipality', 'municipality_breakdown_code_1', 'municipality_breakdown_name_1', 'municipality_breakdown_code_2', 'municipality_breakdown_name_2', 'bicounty_code', 'mcd_code', 'fips_code', 'vtd_code', 'previous_precinct_code', 'previous_congressional_district', 'previous_state_senate_district', 'previous_state_house_district' ] self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'precinct' # Store result instances for bulk loading results = BulkInsertBuffer(RawResult) with self._file_handle as csvfile: if '2014' in self.election_id: reader = unicodecsv.DictReader((line.replace('\0','') for line in csvfile), fieldnames = headers, encoding='latin-1') else: reader = unicodecsv.DictReader(csvfile, fieldnames = headers, encoding='latin-1') for row in reader: if self._skip_row(row): continue rr_kwargs = self._common_kwargs.copy() if 'primary' in self.mapping['election']: rr_kwargs['primary_party'] = row['cand_party_code'].strip() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) county = [c for c in self.datasource._jurisdictions() if c['state_id'] == str(row['county_code'])][0]['name'] county_ocd_id = [c for c in self.datasource._jurisdictions() if c['state_id'] == str(row['county_code'])][0]['ocd_id'] rr_kwargs.update({ 'party': row['cand_party_code'].strip(), 'jurisdiction': str(row['precinct_code']), 'parent_jurisdiction': county, 'ocd_id': "{}/precinct:{}".format(county_ocd_id, ocd_type_id(str(row['precinct_code']))), 'votes': int(row['votes'].strip()), # PA-specific data 'congressional_district': row['congressional_district'], 'state_senate_district': row['state_senate_district'], 'state_house_district': row['state_house_district'], 'municipality_type_code': row['municipality_type_code'], 'municipality': row['municipality'], 'previous_precinct_code': row['previous_precinct_code'], 'previous_congressional_district': row['previous_congressional_district'], 'previous_state_senate_district': row['previous_state_senate_district'], 'previous_state_house_district': row['previous_state_house_district'] }) results.append(RawResult(**rr_kwargs)) results.flush()
def _create_results_collection(self): """ Creates the list-like object that will be used to hold the constructed Result instances. """ return BulkInsertBuffer(Result)
def load(self): headers = [ 'year', 'election_type', 'county_code', 'precinct_code', 'cand_office_rank', 'cand_district', 'cand_party_rank', 'cand_ballot_position', 'cand_office_code', 'cand_party_code', 'cand_number', 'cand_last_name', 'cand_first_name', 'cand_middle_name', 'cand_suffix', 'votes', 'congressional_district', 'state_senate_district', 'state_house_district', 'municipality_type_code', 'municipality', 'municipality_breakdown_code_1', 'municipality_breakdown_name_1', 'municipality_breakdown_code_2', 'municipality_breakdown_name_2', 'bicounty_code', 'mcd_code', 'fips_code', 'vtd_code', 'previous_precinct_code', 'previous_congressional_district', 'previous_state_senate_district', 'previous_state_house_district' ] self._common_kwargs = self._build_common_election_kwargs() self._common_kwargs['reporting_level'] = 'precinct' # Store result instances for bulk loading results = BulkInsertBuffer(RawResult) with self._file_handle as csvfile: if '2014' in self.election_id: reader = unicodecsv.DictReader((line.replace('\0','') for line in csvfile), fieldnames=headers) else: reader = unicodecsv.DictReader(csvfile, fieldnames=headers) for row in reader: if self._skip_row(row): continue rr_kwargs = self._common_kwargs.copy() if 'primary' in self.mapping['election']: rr_kwargs['primary_party'] = row['cand_party_code'].strip() rr_kwargs.update(self._build_contest_kwargs(row)) rr_kwargs.update(self._build_candidate_kwargs(row)) county = [c for c in self.datasource._jurisdictions() if c['state_id'] == str(row['county_code'])][0]['name'] county_ocd_id = [c for c in self.datasource._jurisdictions() if c['state_id'] == str(row['county_code'])][0]['ocd_id'] rr_kwargs.update({ 'party': row['cand_party_code'].strip(), 'jurisdiction': str(row['precinct_code']), 'parent_jurisdiction': county, 'ocd_id': "{}/precinct:{}".format(county_ocd_id, ocd_type_id(str(row['precinct_code']))), 'votes': int(row['votes'].strip()), # PA-specific data 'congressional_district': row['congressional_district'], 'state_senate_district': row['state_senate_district'], 'state_house_district': row['state_house_district'], 'municipality_type_code': row['municipality_type_code'], 'municipality': row['municipality'], 'previous_precinct_code': row['previous_precinct_code'], 'previous_congressional_district': row['previous_congressional_district'], 'previous_state_senate_district': row['previous_state_senate_district'], 'previous_state_house_district': row['previous_state_house_district'] }) results.append(RawResult(**rr_kwargs)) results.flush()