def _populate_votes(self, row): super()._populate_votes(row) if self._office != 'STATISTICS' and row not in BUGGY_ROWS: vote_percent_string = next(self._string_iterator) assert '%' in vote_percent_string @classmethod def _clean_row(cls, row): super()._clean_row(row) row['office'] = row['office'].title() @classmethod def _should_be_recorded(cls, row): if not super()._should_be_recorded(row): return False if row['office'] == 'Borough Of Mahanoy City Mahanoy City': return False return True class SchuylkillPDFPageParser(ElectionwarePDFPageParser): _pdf_string_iterator_clazz = SchuylkillPDFStringIterator _pdf_table_parser_clazz = SchuylkillPDFTableParser _header = SCHUYLKILL_HEADER if __name__ == "__main__": with open(OUTPUT_FILE, 'w', newline='') as f: pdf_to_csv(PDFPageIterator(SCHUYLKILL_FILE), csv.DictWriter(f, OUTPUT_HEADER), SchuylkillPDFPageParser)
class NorthamptonPDFTableParser(ElectionwarePDFTableParser): _county = COUNTY _expected_table_headers = EXPECTED_TABLE_HEADERS _openelections_mapped_header = OPENELECTIONS_MAPPED_HEADER _raw_office_to_office_and_district = RAW_OFFICE_TO_OFFICE_AND_DISTRICT @classmethod def _should_be_recorded(cls, row): if not super()._should_be_recorded(row): return False if 'County Committee' in row['office']: return False if row['office'] == 'Library Tax Question': return False return True class NorthamptonPDFPageParser(ElectionwarePDFPageParser): _pdf_string_iterator_clazz = NorthamptonPDFStringIterator _pdf_table_parser_clazz = NorthamptonPDFTableParser _header = NORTHAMPTON_HEADER if __name__ == "__main__": with open(OUTPUT_FILE, 'w', newline='') as f: pdf_to_csv(PDFPageIterator(NORTHAMPTON_FILE), csv.DictWriter(f, OUTPUT_HEADER), NorthamptonPDFPageParser)
next(self._string_iterator) @classmethod def _clean_row(cls, row): super()._clean_row(row) row['office'] = row['office'].title() row['candidate'] = row['candidate'].title() @classmethod def _should_be_recorded(cls, row): if row['candidate'].startswith('Write-In: '): # there's already a Write-In Totals field; this prevents double counting return False if 'Delegate' in row['office']: return False if 'Comm' in row['office']: return False return super()._should_be_recorded(row) class BlairPDFPageParser(ElectionwarePDFPageParser): _pdf_string_iterator_clazz = BlairPDFStringIterator _pdf_table_parser_clazz = BlairPDFTableParser _header = BLAIR_HEADER if __name__ == "__main__": with open(OUTPUT_FILE, 'w', newline='') as f: pdf_to_csv(PDFPageIterator(BLAIR_FILE), csv.DictWriter(f, OUTPUT_HEADER), BlairPDFPageParser)
class CambriaPDFStringIterator(ElectionwarePDFStringIterator): _first_footer_substring = FIRST_FOOTER_SUBSTRING _second_footer_substring = SECOND_FOOTER_SUBSTRING class CambriaPDFTableParser(ElectionwarePDFTableParser): _county = COUNTY _expected_table_headers = EXPECTED_TABLE_HEADERS _openelections_mapped_header = OPENELECTIONS_MAPPED_HEADER _raw_office_to_office_and_district = RAW_OFFICE_TO_OFFICE_AND_DISTRICT @classmethod def _clean_row(cls, row): super()._clean_row(row) row['office'] = row['office'].title() row['candidate'] = row['candidate'].title() class CambriaPDFPageParser(ElectionwarePDFPageParser): _pdf_string_iterator_clazz = CambriaPDFStringIterator _pdf_table_parser_clazz = CambriaPDFTableParser _header = CAMBRIA_HEADER if __name__ == "__main__": with open(OUTPUT_FILE, 'w', newline='') as f: pdf_to_csv(PDFPageIterator(CAMBRIA_FILE), csv.DictWriter(f, OUTPUT_HEADER), CambriaPDFPageParser)
_county = COUNTY _expected_table_headers = EXPECTED_TABLE_HEADERS _openelections_mapped_header = OPENELECTIONS_MAPPED_HEADER _raw_office_to_office_and_district = RAW_OFFICE_TO_OFFICE_AND_DISTRICT @classmethod def _clean_row(cls, row): super()._clean_row(row) row['office'] = row['office'].title() row['candidate'] = row['candidate'].title() @classmethod def _should_be_recorded(cls, row): if 'Del ' in row['office']: return False if 'Comm' in row['office']: return False return super()._should_be_recorded(row) class BeaverPDFPageParser(ElectionwarePDFPageParser): _pdf_string_iterator_clazz = BeaverPDFStringIterator _pdf_table_parser_clazz = BeaverPDFTableParser _header = BEAVER_HEADER if __name__ == "__main__": with open(OUTPUT_FILE, 'w', newline='') as f: pdf_to_csv(PDFPageIterator(BEAVER_FILE), csv.DictWriter(f, OUTPUT_HEADER), BeaverPDFPageParser)
_county = COUNTY _expected_table_headers = EXPECTED_TABLE_HEADERS _openelections_mapped_header = OPENELECTIONS_MAPPED_HEADER _raw_office_to_office_and_district = RAW_OFFICE_TO_OFFICE_AND_DISTRICT @classmethod def _clean_row(cls, row): super()._clean_row(row) row['office'] = row['office'].title() row['candidate'] = row['candidate'].replace('Write-In: ', '').title() @classmethod def _should_be_recorded(cls, row): if not super()._should_be_recorded(row): return False if row['office'] == 'Wheatland Home Rule': return False return True class MercerPDFPageParser(ElectionwarePDFPageParser): _pdf_string_iterator_clazz = MercerPDFStringIterator _pdf_table_parser_clazz = MercerPDFTableParser _header = MERCER_HEADER if __name__ == "__main__": with open(OUTPUT_FILE, 'w', newline='') as f: pdf_to_csv(PDFPageIterator(MERCER_FILE), csv.DictWriter(f, OUTPUT_HEADER), MercerPDFPageParser)
if self._office != 'STATISTICS': vote_percent_string = next(self._string_iterator) assert '%' in vote_percent_string @classmethod def _clean_row(cls, row): print(row) super()._clean_row(row) row['office'] = row['office'].title() row['candidate'] = row['candidate'].title() @classmethod def _should_be_recorded(cls, row): if not super()._should_be_recorded(row): return False if 'Committee' in row['office']: return False return True class LebanonPDFPageParser(ElectionwarePDFPageParser): _pdf_string_iterator_clazz = LebanonPDFStringIterator _pdf_table_parser_clazz = LebanonPDFTableParser _header = LEBANON_HEADER if __name__ == "__main__": with open(OUTPUT_FILE, 'w', newline='') as f: pdf_to_csv(PDFPageIterator(LEBANON_FILE), csv.DictWriter(f, OUTPUT_HEADER), LebanonPDFPageParser)
} class LackawannaPDFStringIterator(ElectionwarePDFStringIterator): _first_footer_substring = FIRST_FOOTER_SUBSTRING _second_footer_substring = SECOND_FOOTER_SUBSTRING class LackawannaPDFTableParser(ElectionwarePDFTableParser): _county = COUNTY _expected_table_headers = EXPECTED_TABLE_HEADERS _openelections_mapped_header = OPENELECTIONS_MAPPED_HEADER _raw_office_to_office_and_district = RAW_OFFICE_TO_OFFICE_AND_DISTRICT @classmethod def _clean_row(cls, row): super()._clean_row(row) row['office'] = row['office'].title() class LackawannaPDFPageParser(ElectionwarePDFPageParser): _pdf_string_iterator_clazz = LackawannaPDFStringIterator _pdf_table_parser_clazz = LackawannaPDFTableParser _header = LACKAWANNA_HEADER if __name__ == "__main__": with open(OUTPUT_FILE, 'w', newline='') as f: pdf_to_csv(PDFPageIterator(LACKAWANNA_FILE), csv.DictWriter(f, OUTPUT_HEADER), LackawannaPDFPageParser)
@classmethod def _clean_row(cls, row): super()._clean_row(row) row['office'] = row['office'].title() row['candidate'] = row['candidate'].title() @classmethod def _should_be_recorded(cls, row): if 'Delegate' in row['office']: return False if 'Committee' in row['office']: return False if 'Liquor' in row['office']: return False if 'Council' in row['office']: return False return super()._should_be_recorded(row) class ChesterPDFPageParser(ElectionwarePDFPageParser): _pdf_string_iterator_clazz = ChesterPDFStringIterator _pdf_table_parser_clazz = ChesterPDFTableParser _header = CHESTER_HEADER if __name__ == "__main__": with open(OUTPUT_FILE, 'w', newline='') as f: pdf_to_csv(PDFPageIterator(CHESTER_FILE), csv.DictWriter(f, OUTPUT_HEADER), ChesterPDFPageParser)
_raw_office_to_office_and_district = RAW_OFFICE_TO_OFFICE_AND_DISTRICT @classmethod def _clean_row(cls, row): super()._clean_row(row) row['office'] = row['office'].title() row['candidate'] = row['candidate'].title() @classmethod def _should_be_recorded(cls, row): if row['candidate'].startswith('Write-In: '): # there's already a Write-In Totals field; this prevents double counting return False if 'Delegate' in row['office']: return False if 'Comm' in row['office']: return False return super()._should_be_recorded(row) class MifflinPDFPageParser(ElectionwarePDFPageParser): _pdf_string_iterator_clazz = MifflinPDFStringIterator _pdf_table_parser_clazz = MifflinPDFTableParser _header = MIFFLIN_HEADER if __name__ == "__main__": with open(OUTPUT_FILE, 'w', newline='') as f: pdf_to_csv(PDFPageIterator(MIFFLIN_FILE), csv.DictWriter(f, OUTPUT_HEADER), MifflinPDFPageParser)
@classmethod def _clean_row(cls, row): super()._clean_row(row) row['office'] = row['office'].title() row['candidate'] = row['candidate'].title() @classmethod def _should_be_recorded(cls, row): if row['candidate'].startswith('Write-In: '): # there's already a Write-In Totals field; this prevents double counting return False if 'Delegate' in row['office']: return False if 'Comm' in row['office']: return False return super()._should_be_recorded(row) class ClearfieldPDFPageParser(ElectionwarePDFPageParser): _pdf_string_iterator_clazz = ClearfieldPDFStringIterator _pdf_table_parser_clazz = ClearfieldPDFTableParser _header = CLEARFIELD_HEADER if __name__ == "__main__": with open(OUTPUT_FILE, 'w', newline='') as f: pdf_to_csv(PDFPageIterator(CLEARFIELD_FILE), csv.DictWriter(f, OUTPUT_HEADER), ClearfieldPDFPageParser)
@classmethod def _clean_row(cls, row): row['candidate'] = row['candidate'].replace('REPUBLICIAN', 'REPUBLICAN') super()._clean_row(row) row['office'] = row['office'].title() row['candidate'] = row['candidate'].title() @classmethod def _should_be_recorded(cls, row): if row['candidate'].startswith('Write-In: '): # there's already a Write-In Totals field; this prevents double counting return False if 'Del ' in row['office']: return False if 'Cmte' in row['office']: return False return super()._should_be_recorded(row) class ClintonPDFPageParser(ElectionwarePDFPageParser): _pdf_string_iterator_clazz = ClintonPDFStringIterator _pdf_table_parser_clazz = ClintonPDFTableParser _header = CLINTON_HEADER if __name__ == "__main__": with open(OUTPUT_FILE, 'w', newline='') as f: pdf_to_csv(PDFPageIterator(CLINTON_FILE), csv.DictWriter(f, OUTPUT_HEADER), ClintonPDFPageParser)
@classmethod def _clean_row(cls, row): super()._clean_row(row) row['office'] = row['office'].title() @classmethod def _should_be_recorded(cls, row): if row['candidate'].startswith('Write-In: '): # there's already a Write-In Totals field; this prevents double counting return False return super()._should_be_recorded(row) class TiogaPDFPageParser(ElectionwarePDFPageParser): _pdf_string_iterator_clazz = TiogaPDFStringIterator _pdf_table_parser_clazz = TiogaPDFTableParser _header = TIOGA_HEADER def __init__(self, page): super().__init__(page) if page.get_page_number() < FIRST_PER_PRECINCT_PAGE: # skip these pages; these are the summary pages strings = [FIRST_FOOTER_SUBSTRING] self._string_iterator = TiogaPDFStringIterator(strings) if __name__ == "__main__": with open(OUTPUT_FILE, 'w', newline='') as f: pdf_to_csv(PDFPageIterator(TIOGA_FILE), csv.DictWriter(f, OUTPUT_HEADER), TiogaPDFPageParser)
def _populate_votes(self, row): super()._populate_votes(row) if '%' in self._string_iterator.peek(): next(self._string_iterator) # vote % string, not always supplied @classmethod def _clean_row(cls, row): super()._clean_row(row) row['office'] = row['office'].title() row['candidate'] = row['candidate'].title() @classmethod def _should_be_recorded(cls, row): if 'Delegate' in row['office']: return False return super()._should_be_recorded(row) class CentrePDFPageParser(ElectionwarePDFPageParser): _pdf_string_iterator_clazz = CentrePDFStringIterator _pdf_table_parser_clazz = CentrePDFTableParser _header = CENTRE_HEADER if __name__ == "__main__": with open(OUTPUT_FILE, 'w', newline='') as f: pdf_to_csv(PDFPageIterator(CENTRE_FILE), csv.DictWriter(f, OUTPUT_HEADER), CentrePDFPageParser)
class AdamsPDFTableParser(ElectionwarePDFTableParser): _county = COUNTY _expected_table_headers = EXPECTED_TABLE_HEADERS _openelections_mapped_header = OPENELECTIONS_MAPPED_HEADER _raw_office_to_office_and_district = RAW_OFFICE_TO_OFFICE_AND_DISTRICT def _verify_table_header(self): if self._office != 'STATISTICS': vote_percent_header = next(self._string_iterator) assert vote_percent_header == 'VOTE %' super()._verify_table_header() def _populate_votes(self, row): super()._populate_votes(row) if self._office != 'STATISTICS' and row[ 'candidate'] != 'Contest Totals': vote_percent_string = next(self._string_iterator) assert '%' in vote_percent_string class AdamsPDFPageParser(ElectionwarePDFPageParser): _pdf_string_iterator_clazz = AdamsPDFStringIterator _pdf_table_parser_clazz = AdamsPDFTableParser _header = ADAMS_HEADER if __name__ == "__main__": with open(OUTPUT_FILE, 'w', newline='') as f: pdf_to_csv(PDFPageIterator(ADAMS_FILE), csv.DictWriter(f, OUTPUT_HEADER), AdamsPDFPageParser)