Python pdf_to_csvの例、parsers.electionware_parser.pdf_to_csv Pythonの例

コード例 #1

0

ファイルを表示

    def _populate_votes(self, row):
        super()._populate_votes(row)
        if self._office != 'STATISTICS' and row not in BUGGY_ROWS:
            vote_percent_string = next(self._string_iterator)
            assert '%' in vote_percent_string

    @classmethod
    def _clean_row(cls, row):
        super()._clean_row(row)
        row['office'] = row['office'].title()

    @classmethod
    def _should_be_recorded(cls, row):
        if not super()._should_be_recorded(row):
            return False
        if row['office'] == 'Borough Of Mahanoy City Mahanoy City':
            return False
        return True


class SchuylkillPDFPageParser(ElectionwarePDFPageParser):
    _pdf_string_iterator_clazz = SchuylkillPDFStringIterator
    _pdf_table_parser_clazz = SchuylkillPDFTableParser
    _header = SCHUYLKILL_HEADER


if __name__ == "__main__":
    with open(OUTPUT_FILE, 'w', newline='') as f:
        pdf_to_csv(PDFPageIterator(SCHUYLKILL_FILE),
                   csv.DictWriter(f, OUTPUT_HEADER), SchuylkillPDFPageParser)

コード例 #2

0

ファイルを表示


class NorthamptonPDFTableParser(ElectionwarePDFTableParser):
    _county = COUNTY
    _expected_table_headers = EXPECTED_TABLE_HEADERS
    _openelections_mapped_header = OPENELECTIONS_MAPPED_HEADER
    _raw_office_to_office_and_district = RAW_OFFICE_TO_OFFICE_AND_DISTRICT

    @classmethod
    def _should_be_recorded(cls, row):
        if not super()._should_be_recorded(row):
            return False
        if 'County Committee' in row['office']:
            return False
        if row['office'] == 'Library Tax Question':
            return False
        return True


class NorthamptonPDFPageParser(ElectionwarePDFPageParser):
    _pdf_string_iterator_clazz = NorthamptonPDFStringIterator
    _pdf_table_parser_clazz = NorthamptonPDFTableParser
    _header = NORTHAMPTON_HEADER


if __name__ == "__main__":
    with open(OUTPUT_FILE, 'w', newline='') as f:
        pdf_to_csv(PDFPageIterator(NORTHAMPTON_FILE),
                   csv.DictWriter(f, OUTPUT_HEADER),
                   NorthamptonPDFPageParser)

コード例 #3

0

ファイルを表示

ファイル: pa_blair_primary_2020_results_parser.py プロジェクト: rbier/openelections-data-pa

                next(self._string_iterator)

    @classmethod
    def _clean_row(cls, row):
        super()._clean_row(row)
        row['office'] = row['office'].title()
        row['candidate'] = row['candidate'].title()

    @classmethod
    def _should_be_recorded(cls, row):
        if row['candidate'].startswith('Write-In: '):
            # there's already a Write-In Totals field; this prevents double counting
            return False
        if 'Delegate' in row['office']:
            return False
        if 'Comm' in row['office']:
            return False
        return super()._should_be_recorded(row)


class BlairPDFPageParser(ElectionwarePDFPageParser):
    _pdf_string_iterator_clazz = BlairPDFStringIterator
    _pdf_table_parser_clazz = BlairPDFTableParser
    _header = BLAIR_HEADER


if __name__ == "__main__":
    with open(OUTPUT_FILE, 'w', newline='') as f:
        pdf_to_csv(PDFPageIterator(BLAIR_FILE),
                   csv.DictWriter(f, OUTPUT_HEADER), BlairPDFPageParser)

コード例 #4

0

ファイルを表示

ファイル: pa_cambria_primary_2020_results_parser.py プロジェクト: rbier/openelections-data-pa


class CambriaPDFStringIterator(ElectionwarePDFStringIterator):
    _first_footer_substring = FIRST_FOOTER_SUBSTRING
    _second_footer_substring = SECOND_FOOTER_SUBSTRING


class CambriaPDFTableParser(ElectionwarePDFTableParser):
    _county = COUNTY
    _expected_table_headers = EXPECTED_TABLE_HEADERS
    _openelections_mapped_header = OPENELECTIONS_MAPPED_HEADER
    _raw_office_to_office_and_district = RAW_OFFICE_TO_OFFICE_AND_DISTRICT

    @classmethod
    def _clean_row(cls, row):
        super()._clean_row(row)
        row['office'] = row['office'].title()
        row['candidate'] = row['candidate'].title()


class CambriaPDFPageParser(ElectionwarePDFPageParser):
    _pdf_string_iterator_clazz = CambriaPDFStringIterator
    _pdf_table_parser_clazz = CambriaPDFTableParser
    _header = CAMBRIA_HEADER


if __name__ == "__main__":
    with open(OUTPUT_FILE, 'w', newline='') as f:
        pdf_to_csv(PDFPageIterator(CAMBRIA_FILE),
                   csv.DictWriter(f, OUTPUT_HEADER), CambriaPDFPageParser)

コード例 #5

0

ファイルを表示

ファイル: pa_beaver_general_2020_results_parser.py プロジェクト: saewitz/openelections-data-pa

    _county = COUNTY
    _expected_table_headers = EXPECTED_TABLE_HEADERS
    _openelections_mapped_header = OPENELECTIONS_MAPPED_HEADER
    _raw_office_to_office_and_district = RAW_OFFICE_TO_OFFICE_AND_DISTRICT

    @classmethod
    def _clean_row(cls, row):
        super()._clean_row(row)
        row['office'] = row['office'].title()
        row['candidate'] = row['candidate'].title()

    @classmethod
    def _should_be_recorded(cls, row):
        if 'Del ' in row['office']:
            return False
        if 'Comm' in row['office']:
            return False
        return super()._should_be_recorded(row)


class BeaverPDFPageParser(ElectionwarePDFPageParser):
    _pdf_string_iterator_clazz = BeaverPDFStringIterator
    _pdf_table_parser_clazz = BeaverPDFTableParser
    _header = BEAVER_HEADER


if __name__ == "__main__":
    with open(OUTPUT_FILE, 'w', newline='') as f:
        pdf_to_csv(PDFPageIterator(BEAVER_FILE),
                   csv.DictWriter(f, OUTPUT_HEADER), BeaverPDFPageParser)

コード例 #6

0

ファイルを表示

ファイル: pa_mercer_primary_2020_results_parser.py プロジェクト: rbier/openelections-data-pa

    _county = COUNTY
    _expected_table_headers = EXPECTED_TABLE_HEADERS
    _openelections_mapped_header = OPENELECTIONS_MAPPED_HEADER
    _raw_office_to_office_and_district = RAW_OFFICE_TO_OFFICE_AND_DISTRICT

    @classmethod
    def _clean_row(cls, row):
        super()._clean_row(row)
        row['office'] = row['office'].title()
        row['candidate'] = row['candidate'].replace('Write-In: ', '').title()

    @classmethod
    def _should_be_recorded(cls, row):
        if not super()._should_be_recorded(row):
            return False
        if row['office'] == 'Wheatland Home Rule':
            return False
        return True


class MercerPDFPageParser(ElectionwarePDFPageParser):
    _pdf_string_iterator_clazz = MercerPDFStringIterator
    _pdf_table_parser_clazz = MercerPDFTableParser
    _header = MERCER_HEADER


if __name__ == "__main__":
    with open(OUTPUT_FILE, 'w', newline='') as f:
        pdf_to_csv(PDFPageIterator(MERCER_FILE),
                   csv.DictWriter(f, OUTPUT_HEADER), MercerPDFPageParser)

コード例 #7

0

ファイルを表示

        if self._office != 'STATISTICS':
            vote_percent_string = next(self._string_iterator)
            assert '%' in vote_percent_string

    @classmethod
    def _clean_row(cls, row):
        print(row)
        super()._clean_row(row)
        row['office'] = row['office'].title()
        row['candidate'] = row['candidate'].title()

    @classmethod
    def _should_be_recorded(cls, row):
        if not super()._should_be_recorded(row):
            return False
        if 'Committee' in row['office']:
            return False
        return True


class LebanonPDFPageParser(ElectionwarePDFPageParser):
    _pdf_string_iterator_clazz = LebanonPDFStringIterator
    _pdf_table_parser_clazz = LebanonPDFTableParser
    _header = LEBANON_HEADER


if __name__ == "__main__":
    with open(OUTPUT_FILE, 'w', newline='') as f:
        pdf_to_csv(PDFPageIterator(LEBANON_FILE),
                   csv.DictWriter(f, OUTPUT_HEADER), LebanonPDFPageParser)

コード例 #8

0

ファイルを表示

ファイル: pa_lackawanna_primary_2020_results_parser.py プロジェクト: rbier/openelections-data-pa

}


class LackawannaPDFStringIterator(ElectionwarePDFStringIterator):
    _first_footer_substring = FIRST_FOOTER_SUBSTRING
    _second_footer_substring = SECOND_FOOTER_SUBSTRING


class LackawannaPDFTableParser(ElectionwarePDFTableParser):
    _county = COUNTY
    _expected_table_headers = EXPECTED_TABLE_HEADERS
    _openelections_mapped_header = OPENELECTIONS_MAPPED_HEADER
    _raw_office_to_office_and_district = RAW_OFFICE_TO_OFFICE_AND_DISTRICT

    @classmethod
    def _clean_row(cls, row):
        super()._clean_row(row)
        row['office'] = row['office'].title()


class LackawannaPDFPageParser(ElectionwarePDFPageParser):
    _pdf_string_iterator_clazz = LackawannaPDFStringIterator
    _pdf_table_parser_clazz = LackawannaPDFTableParser
    _header = LACKAWANNA_HEADER


if __name__ == "__main__":
    with open(OUTPUT_FILE, 'w', newline='') as f:
        pdf_to_csv(PDFPageIterator(LACKAWANNA_FILE),
                   csv.DictWriter(f, OUTPUT_HEADER), LackawannaPDFPageParser)

コード例 #9

0

ファイルを表示

    @classmethod
    def _clean_row(cls, row):
        super()._clean_row(row)
        row['office'] = row['office'].title()
        row['candidate'] = row['candidate'].title()

    @classmethod
    def _should_be_recorded(cls, row):
        if 'Delegate' in row['office']:
            return False
        if 'Committee' in row['office']:
            return False
        if 'Liquor' in row['office']:
            return False
        if 'Council' in row['office']:
            return False
        return super()._should_be_recorded(row)


class ChesterPDFPageParser(ElectionwarePDFPageParser):
    _pdf_string_iterator_clazz = ChesterPDFStringIterator
    _pdf_table_parser_clazz = ChesterPDFTableParser
    _header = CHESTER_HEADER


if __name__ == "__main__":
    with open(OUTPUT_FILE, 'w', newline='') as f:
        pdf_to_csv(PDFPageIterator(CHESTER_FILE),
                   csv.DictWriter(f, OUTPUT_HEADER), ChesterPDFPageParser)

コード例 #10

0

ファイルを表示

    _raw_office_to_office_and_district = RAW_OFFICE_TO_OFFICE_AND_DISTRICT

    @classmethod
    def _clean_row(cls, row):
        super()._clean_row(row)
        row['office'] = row['office'].title()
        row['candidate'] = row['candidate'].title()

    @classmethod
    def _should_be_recorded(cls, row):
        if row['candidate'].startswith('Write-In: '):
            # there's already a Write-In Totals field; this prevents double counting
            return False
        if 'Delegate' in row['office']:
            return False
        if 'Comm' in row['office']:
            return False
        return super()._should_be_recorded(row)


class MifflinPDFPageParser(ElectionwarePDFPageParser):
    _pdf_string_iterator_clazz = MifflinPDFStringIterator
    _pdf_table_parser_clazz = MifflinPDFTableParser
    _header = MIFFLIN_HEADER


if __name__ == "__main__":
    with open(OUTPUT_FILE, 'w', newline='') as f:
        pdf_to_csv(PDFPageIterator(MIFFLIN_FILE),
                   csv.DictWriter(f, OUTPUT_HEADER), MifflinPDFPageParser)

コード例 #11

0

ファイルを表示

ファイル: pa_clearfield_primary_2020_results_parser.py プロジェクト: rbier/openelections-data-pa

    @classmethod
    def _clean_row(cls, row):
        super()._clean_row(row)
        row['office'] = row['office'].title()
        row['candidate'] = row['candidate'].title()

    @classmethod
    def _should_be_recorded(cls, row):
        if row['candidate'].startswith('Write-In: '):
            # there's already a Write-In Totals field; this prevents double counting
            return False
        if 'Delegate' in row['office']:
            return False
        if 'Comm' in row['office']:
            return False
        return super()._should_be_recorded(row)


class ClearfieldPDFPageParser(ElectionwarePDFPageParser):
    _pdf_string_iterator_clazz = ClearfieldPDFStringIterator
    _pdf_table_parser_clazz = ClearfieldPDFTableParser
    _header = CLEARFIELD_HEADER


if __name__ == "__main__":
    with open(OUTPUT_FILE, 'w', newline='') as f:
        pdf_to_csv(PDFPageIterator(CLEARFIELD_FILE),
                   csv.DictWriter(f, OUTPUT_HEADER),
                   ClearfieldPDFPageParser)

コード例 #12

0

ファイルを表示

    @classmethod
    def _clean_row(cls, row):
        row['candidate'] = row['candidate'].replace('REPUBLICIAN',
                                                    'REPUBLICAN')
        super()._clean_row(row)
        row['office'] = row['office'].title()
        row['candidate'] = row['candidate'].title()

    @classmethod
    def _should_be_recorded(cls, row):
        if row['candidate'].startswith('Write-In: '):
            # there's already a Write-In Totals field; this prevents double counting
            return False
        if 'Del ' in row['office']:
            return False
        if 'Cmte' in row['office']:
            return False
        return super()._should_be_recorded(row)


class ClintonPDFPageParser(ElectionwarePDFPageParser):
    _pdf_string_iterator_clazz = ClintonPDFStringIterator
    _pdf_table_parser_clazz = ClintonPDFTableParser
    _header = CLINTON_HEADER


if __name__ == "__main__":
    with open(OUTPUT_FILE, 'w', newline='') as f:
        pdf_to_csv(PDFPageIterator(CLINTON_FILE),
                   csv.DictWriter(f, OUTPUT_HEADER), ClintonPDFPageParser)

コード例 #13

0

ファイルを表示

    @classmethod
    def _clean_row(cls, row):
        super()._clean_row(row)
        row['office'] = row['office'].title()

    @classmethod
    def _should_be_recorded(cls, row):
        if row['candidate'].startswith('Write-In: '):
            # there's already a Write-In Totals field; this prevents double counting
            return False
        return super()._should_be_recorded(row)


class TiogaPDFPageParser(ElectionwarePDFPageParser):
    _pdf_string_iterator_clazz = TiogaPDFStringIterator
    _pdf_table_parser_clazz = TiogaPDFTableParser
    _header = TIOGA_HEADER

    def __init__(self, page):
        super().__init__(page)
        if page.get_page_number() < FIRST_PER_PRECINCT_PAGE:
            # skip these pages; these are the summary pages
            strings = [FIRST_FOOTER_SUBSTRING]
            self._string_iterator = TiogaPDFStringIterator(strings)


if __name__ == "__main__":
    with open(OUTPUT_FILE, 'w', newline='') as f:
        pdf_to_csv(PDFPageIterator(TIOGA_FILE),
                   csv.DictWriter(f, OUTPUT_HEADER), TiogaPDFPageParser)

コード例 #14

0

ファイルを表示

    def _populate_votes(self, row):
        super()._populate_votes(row)
        if '%' in self._string_iterator.peek():
            next(self._string_iterator)  # vote % string, not always supplied

    @classmethod
    def _clean_row(cls, row):
        super()._clean_row(row)
        row['office'] = row['office'].title()
        row['candidate'] = row['candidate'].title()

    @classmethod
    def _should_be_recorded(cls, row):
        if 'Delegate' in row['office']:
            return False
        return super()._should_be_recorded(row)


class CentrePDFPageParser(ElectionwarePDFPageParser):
    _pdf_string_iterator_clazz = CentrePDFStringIterator
    _pdf_table_parser_clazz = CentrePDFTableParser
    _header = CENTRE_HEADER


if __name__ == "__main__":
    with open(OUTPUT_FILE, 'w', newline='') as f:
        pdf_to_csv(PDFPageIterator(CENTRE_FILE),
                   csv.DictWriter(f, OUTPUT_HEADER),
                   CentrePDFPageParser)

コード例 #15

0

ファイルを表示

class AdamsPDFTableParser(ElectionwarePDFTableParser):
    _county = COUNTY
    _expected_table_headers = EXPECTED_TABLE_HEADERS
    _openelections_mapped_header = OPENELECTIONS_MAPPED_HEADER
    _raw_office_to_office_and_district = RAW_OFFICE_TO_OFFICE_AND_DISTRICT

    def _verify_table_header(self):
        if self._office != 'STATISTICS':
            vote_percent_header = next(self._string_iterator)
            assert vote_percent_header == 'VOTE %'
        super()._verify_table_header()

    def _populate_votes(self, row):
        super()._populate_votes(row)
        if self._office != 'STATISTICS' and row[
                'candidate'] != 'Contest Totals':
            vote_percent_string = next(self._string_iterator)
            assert '%' in vote_percent_string


class AdamsPDFPageParser(ElectionwarePDFPageParser):
    _pdf_string_iterator_clazz = AdamsPDFStringIterator
    _pdf_table_parser_clazz = AdamsPDFTableParser
    _header = ADAMS_HEADER


if __name__ == "__main__":
    with open(OUTPUT_FILE, 'w', newline='') as f:
        pdf_to_csv(PDFPageIterator(ADAMS_FILE),
                   csv.DictWriter(f, OUTPUT_HEADER), AdamsPDFPageParser)