Python read_dominion_csv Examples, arlo_e2e.dominion.read_dominion_csv Python Examples

Example #1

0

Show file

File: test_dominion.py Project: nealmcb/arlo-e2e

    def test_read_with_holes(self) -> None:
        input_str = """
"2018 Test Election","5.2.16.1","","","","","","","","","",""
"","","","","","","","","Representative - District X (Vote For=1)","Representative - District X (Vote For=1)","Referendum","Referendum"
"","","","","","","","","Alice","Bob","For","Against"
"CvrNumber","TabulatorNum","BatchId","RecordId","ImprintedId","CountingGroup","PrecinctPortion","BallotType","DEM","REP","",""
="1",="1",="1",="1",="1-1-1","Mail","Thing1","T1","1","0",,
="2",="1",="1",="3",="1-1-3","Mail","Thing2","T2",,,"0","0"
        """
        result: Optional[DominionCSV] = read_dominion_csv(StringIO(input_str))
        if result is None:
            self.fail("Expected not none")
        else:
            self.assertNotEqual(result, None)

            rows = list(result.data.iterrows())
            self.assertEqual(2, len(rows))

            self.assertSetEqual(
                {"Representative - District X"},
                result.metadata.style_map["T1"],
            )

            self.assertSetEqual({"Referendum"},
                                result.metadata.style_map["T2"])

Example #2

0

Show file

File: test_tally.py Project: nealmcb/arlo-e2e

    def test_end_to_end(self, input: str, keypair: ElGamalKeyPair,
                        use_keypair: bool) -> None:
        coverage.process_startup(
        )  # necessary for coverage testing to work in parallel

        cvrs = read_dominion_csv(StringIO(input))
        self.assertIsNotNone(cvrs)

        _, ballots, _ = cvrs.to_election_description()
        assert len(ballots) > 0, "can't have zero ballots!"

        if use_keypair:
            tally = fast_tally_everything(cvrs,
                                          self.pool,
                                          verbose=True,
                                          secret_key=keypair.secret_key)
        else:
            tally = fast_tally_everything(cvrs, self.pool, verbose=True)

        self.assertTrue(tally.all_proofs_valid(verbose=True))

        # Now, while we've got a tally and a set of cvrs, we'll test some of the other utility
        # methods that we have. This is going to be much faster than regenerating cvrs and tallies.

        # TODO: tests for get_contest_titles_matching and get_ballot_styles_for_contest_titles

        for ballot_style in cvrs.metadata.style_map.keys():
            ballots_query = tally.get_ballots_matching_ballot_styles(
                [ballot_style])
            ballots_pandas = cvrs.data[cvrs.data.BallotType == ballot_style]

            self.assertEqual(len(ballots_pandas), len(ballots_query))

Example #3

0

Show file

File: test_dominion.py Project: nealmcb/arlo-e2e

    def test_csv_metadata_roundtrip(self, cvrs: str) -> None:
        parsed = read_dominion_csv(StringIO(cvrs))
        self.assertIsNotNone(parsed)

        original_metadata = parsed.dataframe_without_selections()
        csv_data = original_metadata.to_csv(index=False,
                                            quoting=csv.QUOTE_NONNUMERIC)
        reloaded_metadata = pd.read_csv(StringIO(csv_data))

        self.assertTrue(original_metadata.equals(reloaded_metadata))

Example #4

0

Show file

File: test_dominion.py Project: nealmcb/arlo-e2e

 def test_electionguard_extraction(self) -> None:
     result: Optional[DominionCSV] = read_dominion_csv(
         StringIO(_good_dominion_cvrs))
     if result is None:
         self.fail("Expected not none")
     else:
         election_description, ballots, _ = result.to_election_description()
         self.assertEqual(2, len(election_description.ballot_styles))
         self.assertEqual(2, len(election_description.contests))
         self.assertEqual(4, len(election_description.candidates))
         self.assertEqual(2, len(ballots))

Example #5

0

Show file

File: dominion_hypothesis.py Project: nealmcb/arlo-e2e

def ballots_and_context(draw: _DrawType):
    """
    Wrapper around ElectionGuard's own `ciphertext_elections` strategy and our `dominion_cvrs`, returns
    an instance of `DominionBallotsAndContext` with everything you need for subsequent testing.
    """
    max_votes_per_race = draw(integers(1, 3))
    raw_cvrs = draw(dominion_cvrs(max_votes_per_race=max_votes_per_race))

    parsed: Optional[DominionCSV] = read_dominion_csv(StringIO(raw_cvrs))
    assert parsed is not None, "CVR parser shouldn't fail!"
    ed, ballots, id_map = parsed.to_election_description()
    secret_key, cec = draw(ciphertext_elections(ed))

    return DominionBallotsAndContext(parsed, ed, secret_key, id_map, cec,
                                     ballots)

Example #6

0

Show file

File: test_dominion.py Project: nealmcb/arlo-e2e

    def test_read_dominion_csv(self) -> None:
        result: Optional[DominionCSV] = read_dominion_csv(
            StringIO(_good_dominion_cvrs))
        if result is None:
            self.fail("Expected not none")
        else:
            self.assertEqual("2018 Test Election",
                             result.metadata.election_name)
            self.assertEqual(2, len(result.metadata.contest_map.keys()))
            self.assertIn("Representative - District X",
                          result.metadata.contest_map)
            self.assertIn("Referendum", result.metadata.contest_map)
            rep_list = result.metadata.contest_map[
                "Representative - District X"]
            self.assertIsNotNone(rep_list)
            self.assertIn(_expected_alice_metadata, rep_list)
            self.assertIn(_expected_bob_metadata, rep_list)
            self.assertEqual(
                "Representative - District X | Alice | DEM",
                _expected_alice_metadata.to_string(),
            )
            self.assertIn("Representative - District X | Alice | DEM",
                          result.data)
            self.assertIn("Representative - District X | Bob | REP",
                          result.data)
            referendum_list = result.metadata.contest_map["Referendum"]
            self.assertIsNotNone(referendum_list)
            self.assertIn(_expected_ref_for_metadata, referendum_list)
            self.assertIn(_expected_ref_against_metadata, referendum_list)
            self.assertEqual("Referendum | For",
                             _expected_ref_for_metadata.to_string())
            self.assertEqual("Referendum | Against",
                             _expected_ref_against_metadata.to_string())

            self.assertEqual({"REP", "DEM"}, result.metadata.all_parties)

            rows = list(result.data.iterrows())
            self.assertEqual(2, len(rows))

            x = rows[0][
                1]  # each row is a tuple, the second part is the Series
            self.assertTrue(isinstance(x, pd.Series))
            self.assertEqual(1, x["CvrNumber"])
            self.assertEqual("1-1-1", x["ImprintedId"])
            self.assertEqual(
                "2018 Test Election | 1 | 1 | 1 | 1 | 1-1-1 | Mail | 12345 - STR5 (12345 - STR5) | STR5",
                x["Guid"],
            )

Example #7

0

Show file

def run_bench(filename: str, output_dir: Optional[str],
              use_progressbar: bool) -> None:
    start_time = timer()
    print(f"Benchmarking: {filename}")
    cvrs = read_dominion_csv(filename)
    if cvrs is None:
        print(f"Failed to read {filename}, terminating.")
        exit(1)
    rows, cols = cvrs.data.shape

    parse_time = timer()
    print(
        f"    Parse time: {parse_time - start_time: .3f} sec, {rows / (parse_time - start_time):.3f} ballots/sec"
    )

    assert rows > 0, "can't have zero ballots!"

    # doesn't matter what the key is, so long as it's consistent for both runs
    keypair = get_optional(
        elgamal_keypair_from_secret(int_to_q_unchecked(31337)))

    rtally_start = timer()
    rtally = ray_tally_everything(
        cvrs,
        secret_key=keypair.secret_key,
        verbose=True,
        root_dir=output_dir,
        use_progressbar=use_progressbar,
    )
    rtally_end = timer()

    print(f"\nOVERALL PERFORMANCE")
    print(f"    Ray time:    {rtally_end - rtally_start : .3f} sec")
    print(
        f"    Ray rate:    {rows / (rtally_end - rtally_start): .3f} ballots/sec"
    )

    if output_dir:
        print(f"\nSANITY CHECK")
        assert rtally.all_proofs_valid(
            verbose=True,
            recheck_ballots_and_tallies=False,
            use_progressbar=use_progressbar,
        ), "proof failure!"

Example #8

0

Show file

File: test_ray_tally.py Project: nealmcb/arlo-e2e

    def test_ray_and_multiprocessing_agree(
        self, input: str, keypair: ElGamalKeyPair
    ) -> None:
        self.removeTree()
        # Normally these are generated internally, but by making them be the same, we take all
        # the non-determinism out of the tally_everything methods and get identical results.
        seed_hash = rand_q()
        master_nonce = rand_q()
        date = datetime.now()

        cvrs = read_dominion_csv(StringIO(input))
        self.assertIsNotNone(cvrs)

        _, ballots, _ = cvrs.to_election_description()
        assert len(ballots) > 0, "can't have zero ballots!"

        print(f"Comparing tallies with {len(ballots)} ballot(s).")

        tally = fast_tally_everything(
            cvrs,
            verbose=False,
            date=date,
            secret_key=keypair.secret_key,
            pool=self.pool,
            seed_hash=seed_hash,
            master_nonce=master_nonce,
            use_progressbar=False,
        )
        rtally = ray_tally_everything(
            cvrs,
            verbose=False,
            date=date,
            secret_key=keypair.secret_key,
            seed_hash=seed_hash,
            master_nonce=master_nonce,
            root_dir="rtally_output",
            use_progressbar=False,
        )

        self.assertEqual(tally, rtally.to_fast_tally())

Example #9

0

Show file

File: test_publish.py Project: nealmcb/arlo-e2e

    def test_end_to_end_publications_ray(self, input: str, check_proofs: bool,
                                         keypair: ElGamalKeyPair) -> None:
        self.removeTree(
        )  # if there's anything leftover from a prior run, get rid of it

        cvrs = read_dominion_csv(StringIO(input))
        self.assertIsNotNone(cvrs)

        _, ballots, _ = cvrs.to_election_description()
        assert len(ballots) > 0, "can't have zero ballots!"

        results = ray_tally_everything(
            cvrs,
            secret_key=keypair.secret_key,
            verbose=True,
            root_dir=TALLY_TESTING_DIR,
        )

        self.assertTrue(results.all_proofs_valid())

        # dump files out to disk
        write_ray_tally(results, TALLY_TESTING_DIR)
        log_and_print(
            "tally_testing written, proceeding to read it back in again")

        # now, read it back again!
        results2 = load_ray_tally(
            TALLY_TESTING_DIR,
            check_proofs=check_proofs,
            verbose=True,
            recheck_ballots_and_tallies=True,
        )
        self.assertIsNotNone(results2)

        log_and_print("tally_testing got non-null result!")

        self.assertTrue(
            _list_eq(results.encrypted_ballots, results2.encrypted_ballots))
        self.assertTrue(results.equivalent(results2, keypair))
        self.removeTree()  # clean up our mess

Example #10

0

Show file

File: test_ray_tally.py Project: nealmcb/arlo-e2e

    def test_ray_end_to_end(
        self, input: str, keypair: ElGamalKeyPair, use_keypair: bool
    ) -> None:
        self.removeTree()

        cvrs = read_dominion_csv(StringIO(input))
        self.assertIsNotNone(cvrs)

        _, ballots, _ = cvrs.to_election_description()
        assert len(ballots) > 0, "can't have zero ballots!"

        print(f"End-to-end Ray test with {len(ballots)} ballot(s).")
        if use_keypair:
            rtally = ray_tally_everything(
                cvrs,
                verbose=True,
                secret_key=keypair.secret_key,
                root_dir="rtally_output",
                use_progressbar=False,
            )
        else:
            rtally = ray_tally_everything(
                cvrs, verbose=True, root_dir="rtally_output", use_progressbar=False
            )

        ftally = rtally.to_fast_tally()
        self.assertTrue(ftally.all_proofs_valid(verbose=False))

        # now, we'll write everything to the filesystem and make sure we get the
        # same stuff

        fmanifest = write_fast_tally(ftally, "ftally_output")
        rmanifest = write_ray_tally(rtally, "rtally_output")

        # we can't just assert equality of the manifests, because the root_dirs are different
        self.assertTrue(fmanifest.equivalent(rmanifest))
        self.removeTree()

Example #11

0

Show file

File: test_dominion.py Project: nealmcb/arlo-e2e

    def test_repeating_candidate_names(self) -> None:
        input_str = """
"2018 Test Election","5.2.16.1","","","","","","","","","","","","","",""
"","","","","","","","","District X (Vote For=3)","District X (Vote For=3)","District X (Vote For=3)","District X (Vote For=3)","Referendum 1","Referendum 1","Referendum 2","Referendum 2"
"","","","","","","","","Alice","Write-in","Write-in","Write-in","For","Against","For","Against"
"CvrNumber","TabulatorNum","BatchId","RecordId","ImprintedId","CountingGroup","PrecinctPortion","BallotType","DEM","","","","","","",""
="1",="1",="1",="1",="1-1-1","Mail","Thing1","T1","1","0","0","0","1","0","1","0"
                """
        result: Optional[DominionCSV] = read_dominion_csv(StringIO(input_str))
        if result is None:
            self.fail("Expected not none")
        else:
            self.assertNotEqual(result, None)
            self.assertIsNotNone(result.metadata.contest_map["District X"])
            self.assertIsNotNone(result.metadata.contest_map["Referendum 1"])
            self.assertIsNotNone(result.metadata.contest_map["Referendum 2"])

            # make sure there are no "(2)" things going on in the referenda
            ref1_choice_names = {
                x.choice_name
                for x in result.metadata.contest_map["Referendum 1"]
            }
            ref2_choice_names = {
                x.choice_name
                for x in result.metadata.contest_map["Referendum 2"]
            }
            self.assertEqual({"For", "Against"}, ref1_choice_names)
            self.assertEqual({"For", "Against"}, ref2_choice_names)

            # and now make sure we have what we expect for our write-in race
            choice_names = {
                x.choice_name
                for x in result.metadata.contest_map["District X"]
            }
            self.assertEqual(
                choice_names,
                {"Alice", "Write-in", "Write-in (2)", "Write-in (3)"})

Example #12

0

Show file

File: encryption_bench.py Project: nealmcb/arlo-e2e

def run_bench(filename: str, pool: Pool, file_dir: Optional[str]) -> None:
    start_time = timer()
    print(f"Benchmarking: {filename}")
    log_info(f"Benchmarking: {filename}")
    cvrs = read_dominion_csv(filename)
    if cvrs is None:
        print(f"Failed to read {filename}, terminating.")
        exit(1)
    rows, cols = cvrs.data.shape

    parse_time = timer()
    print(f"    Parse time: {parse_time - start_time: .3f} sec")

    assert rows > 0, "can't have zero ballots!"

    # doesn't matter what the key is, so long as it's consistent for both runs
    keypair = get_optional(elgamal_keypair_from_secret(int_to_q_unchecked(31337)))

    tally_start = timer()
    tally = fast_tally_everything(
        cvrs, pool, verbose=True, secret_key=keypair.secret_key
    )

    if file_dir:
        write_fast_tally(tally, file_dir + "_fast")

    tally_end = timer()
    assert tally.all_proofs_valid(verbose=True), "proof failure!"

    print(f"\nstarting ray.io parallelism")
    rtally_start = timer()
    rtally = ray_tally_everything(
        cvrs,
        secret_key=keypair.secret_key,
        root_dir=file_dir + "_ray" if file_dir else None,
    )
    rtally_end = timer()

    if file_dir:
        rtally_as_fast = rtally.to_fast_tally()
        assert rtally_as_fast.all_proofs_valid(verbose=True), "proof failure!"
        assert tally.equivalent(
            rtally_as_fast, keypair, pool
        ), "tallies aren't equivalent!"

        # Note: tally.equivalent() isn't quite as stringent as asserting that absolutely
        # everything is identical, but it's a pretty good sanity check for our purposes.
        # In tests/test_ray_tally.py, test_ray_and_multiprocessing_agree goes the extra
        # distance to create identical tallies from each system and assert their equality.

    print(f"\nOVERALL PERFORMANCE")
    print(f"    Pool time:   {tally_end - tally_start: .3f} sec")
    print(f"    Pool rate:   {rows / (tally_end - tally_start): .3f} ballots/sec")
    print(f"    Ray time:    {rtally_end - rtally_start : .3f} sec")
    print(f"    Ray rate:    {rows / (rtally_end - rtally_start): .3f} ballots/sec")

    print(
        f"    Ray speedup: {(tally_end - tally_start) / (rtally_end - rtally_start) : .3f} (>1.0 = ray is faster, <1.0 = ray is slower)"
    )

    if file_dir is not None:
        shutil.rmtree(file_dir + "_ray", ignore_errors=True)
        shutil.rmtree(file_dir + "_fast", ignore_errors=True)

Example #13

0

Show file

    tallydir = args.tallies
    use_cluster = args.cluster

    if path.exists(tallydir):
        print(f"Tally directory ({tallydir}) already exists. Exiting.")
        exit(1)

    admin_state: Optional[ElectionAdmin] = load_json_helper(
        ".", keyfile, ElectionAdmin)
    if admin_state is None or not admin_state.is_valid():
        print(f"Election administration key material wasn't valid")
        exit(1)

    print(f"Starting up, reading {cvrfile}")
    start_time = timer()
    cvrs = read_dominion_csv(cvrfile)
    if cvrs is None:
        print(f"Failed to read {cvrfile}, terminating.")
        exit(1)
    rows, cols = cvrs.data.shape
    parse_time = timer()
    print(
        f"    Parse time: {parse_time - start_time: .3f} sec, {rows / (parse_time - start_time):.3f} ballots/sec"
    )
    print(f"    Found {rows} CVRs in {cvrs.metadata.election_name}.")

    if use_cluster:
        ray_init_cluster()
        ray_wait_for_workers()
    else:
        ray_init_localhost()

Example #14

0

Show file

File: test_dominion.py Project: nealmcb/arlo-e2e

 def test_max_votes_per_race_sanity(self, cvrs: str) -> None:
     parsed = read_dominion_csv(StringIO(cvrs))
     self.assertIsNotNone(parsed)

Example #15

0

Show file

File: test_dominion.py Project: nealmcb/arlo-e2e

 def test_read_dominion_csv_failures(self) -> None:
     self.assertIsNone(read_dominion_csv("no-such-file.csv"))

Example #16

0

Show file

File: test_publish.py Project: nealmcb/arlo-e2e

    def test_end_to_end_publications(self, input: str, check_proofs: bool,
                                     keypair: ElGamalKeyPair) -> None:
        coverage.process_startup(
        )  # necessary for coverage testing to work in parallel
        self.removeTree(
        )  # if there's anything leftover from a prior run, get rid of it

        cvrs = read_dominion_csv(StringIO(input))
        self.assertIsNotNone(cvrs)

        _, ballots, _ = cvrs.to_election_description()
        assert len(ballots) > 0, "can't have zero ballots!"

        results = fast_tally_everything(cvrs,
                                        self.pool,
                                        secret_key=keypair.secret_key,
                                        verbose=True)

        self.assertTrue(results.all_proofs_valid(self.pool))

        # dump files out to disk
        write_fast_tally(results, TALLY_TESTING_DIR)
        log_and_print(
            "tally_testing written, proceeding to read it back in again")

        # now, read it back again!
        results2 = load_fast_tally(
            TALLY_TESTING_DIR,
            check_proofs=check_proofs,
            pool=self.pool,
            verbose=True,
            recheck_ballots_and_tallies=True,
        )
        self.assertIsNotNone(results2)

        log_and_print("tally_testing got non-null result!")

        self.assertTrue(
            _list_eq(results.encrypted_ballots, results2.encrypted_ballots))
        self.assertTrue(results.equivalent(results2, keypair, self.pool))

        # Make sure there's an index.html file; throws an exception if it's missing
        self.assertIsNotNone(stat(path.join(TALLY_TESTING_DIR, "index.html")))

        # And lastly, while we're here, we'll use all this machinery to exercise the ballot decryption
        # read/write facilities.

        ied = InternalElectionDescription(results.election_description)

        log_and_print("decrypting one more time")
        pballots = decrypt_ballots(
            ied,
            results.context.crypto_extended_base_hash,
            keypair,
            self.pool,
            results.encrypted_ballots,
        )
        self.assertEqual(len(pballots), len(results.encrypted_ballots))
        self.assertNotIn(None, pballots)

        # for speed, we're only going to do this for the first ballot, not all of them
        pballot = pballots[0]
        eballot = results.encrypted_ballots[0]
        bid = pballot.ballot.object_id
        self.assertTrue(
            verify_proven_ballot_proofs(
                results.context.crypto_extended_base_hash,
                keypair.public_key,
                eballot,
                pballot,
            ))
        write_proven_ballot(pballot, DECRYPTED_DIR)
        self.assertTrue(exists_proven_ballot(bid, DECRYPTED_DIR))
        self.assertFalse(exists_proven_ballot(bid + "0", DECRYPTED_DIR))
        self.assertEqual(pballot, load_proven_ballot(bid, DECRYPTED_DIR))

        self.removeTree()  # clean up our mess