def test_read_with_holes(self) -> None: input_str = """ "2018 Test Election","5.2.16.1","","","","","","","","","","" "","","","","","","","","Representative - District X (Vote For=1)","Representative - District X (Vote For=1)","Referendum","Referendum" "","","","","","","","","Alice","Bob","For","Against" "CvrNumber","TabulatorNum","BatchId","RecordId","ImprintedId","CountingGroup","PrecinctPortion","BallotType","DEM","REP","","" ="1",="1",="1",="1",="1-1-1","Mail","Thing1","T1","1","0",, ="2",="1",="1",="3",="1-1-3","Mail","Thing2","T2",,,"0","0" """ result: Optional[DominionCSV] = read_dominion_csv(StringIO(input_str)) if result is None: self.fail("Expected not none") else: self.assertNotEqual(result, None) rows = list(result.data.iterrows()) self.assertEqual(2, len(rows)) self.assertSetEqual( {"Representative - District X"}, result.metadata.style_map["T1"], ) self.assertSetEqual({"Referendum"}, result.metadata.style_map["T2"])
def test_end_to_end(self, input: str, keypair: ElGamalKeyPair, use_keypair: bool) -> None: coverage.process_startup( ) # necessary for coverage testing to work in parallel cvrs = read_dominion_csv(StringIO(input)) self.assertIsNotNone(cvrs) _, ballots, _ = cvrs.to_election_description() assert len(ballots) > 0, "can't have zero ballots!" if use_keypair: tally = fast_tally_everything(cvrs, self.pool, verbose=True, secret_key=keypair.secret_key) else: tally = fast_tally_everything(cvrs, self.pool, verbose=True) self.assertTrue(tally.all_proofs_valid(verbose=True)) # Now, while we've got a tally and a set of cvrs, we'll test some of the other utility # methods that we have. This is going to be much faster than regenerating cvrs and tallies. # TODO: tests for get_contest_titles_matching and get_ballot_styles_for_contest_titles for ballot_style in cvrs.metadata.style_map.keys(): ballots_query = tally.get_ballots_matching_ballot_styles( [ballot_style]) ballots_pandas = cvrs.data[cvrs.data.BallotType == ballot_style] self.assertEqual(len(ballots_pandas), len(ballots_query))
def test_csv_metadata_roundtrip(self, cvrs: str) -> None: parsed = read_dominion_csv(StringIO(cvrs)) self.assertIsNotNone(parsed) original_metadata = parsed.dataframe_without_selections() csv_data = original_metadata.to_csv(index=False, quoting=csv.QUOTE_NONNUMERIC) reloaded_metadata = pd.read_csv(StringIO(csv_data)) self.assertTrue(original_metadata.equals(reloaded_metadata))
def test_electionguard_extraction(self) -> None: result: Optional[DominionCSV] = read_dominion_csv( StringIO(_good_dominion_cvrs)) if result is None: self.fail("Expected not none") else: election_description, ballots, _ = result.to_election_description() self.assertEqual(2, len(election_description.ballot_styles)) self.assertEqual(2, len(election_description.contests)) self.assertEqual(4, len(election_description.candidates)) self.assertEqual(2, len(ballots))
def ballots_and_context(draw: _DrawType): """ Wrapper around ElectionGuard's own `ciphertext_elections` strategy and our `dominion_cvrs`, returns an instance of `DominionBallotsAndContext` with everything you need for subsequent testing. """ max_votes_per_race = draw(integers(1, 3)) raw_cvrs = draw(dominion_cvrs(max_votes_per_race=max_votes_per_race)) parsed: Optional[DominionCSV] = read_dominion_csv(StringIO(raw_cvrs)) assert parsed is not None, "CVR parser shouldn't fail!" ed, ballots, id_map = parsed.to_election_description() secret_key, cec = draw(ciphertext_elections(ed)) return DominionBallotsAndContext(parsed, ed, secret_key, id_map, cec, ballots)
def test_read_dominion_csv(self) -> None: result: Optional[DominionCSV] = read_dominion_csv( StringIO(_good_dominion_cvrs)) if result is None: self.fail("Expected not none") else: self.assertEqual("2018 Test Election", result.metadata.election_name) self.assertEqual(2, len(result.metadata.contest_map.keys())) self.assertIn("Representative - District X", result.metadata.contest_map) self.assertIn("Referendum", result.metadata.contest_map) rep_list = result.metadata.contest_map[ "Representative - District X"] self.assertIsNotNone(rep_list) self.assertIn(_expected_alice_metadata, rep_list) self.assertIn(_expected_bob_metadata, rep_list) self.assertEqual( "Representative - District X | Alice | DEM", _expected_alice_metadata.to_string(), ) self.assertIn("Representative - District X | Alice | DEM", result.data) self.assertIn("Representative - District X | Bob | REP", result.data) referendum_list = result.metadata.contest_map["Referendum"] self.assertIsNotNone(referendum_list) self.assertIn(_expected_ref_for_metadata, referendum_list) self.assertIn(_expected_ref_against_metadata, referendum_list) self.assertEqual("Referendum | For", _expected_ref_for_metadata.to_string()) self.assertEqual("Referendum | Against", _expected_ref_against_metadata.to_string()) self.assertEqual({"REP", "DEM"}, result.metadata.all_parties) rows = list(result.data.iterrows()) self.assertEqual(2, len(rows)) x = rows[0][ 1] # each row is a tuple, the second part is the Series self.assertTrue(isinstance(x, pd.Series)) self.assertEqual(1, x["CvrNumber"]) self.assertEqual("1-1-1", x["ImprintedId"]) self.assertEqual( "2018 Test Election | 1 | 1 | 1 | 1 | 1-1-1 | Mail | 12345 - STR5 (12345 - STR5) | STR5", x["Guid"], )
def run_bench(filename: str, output_dir: Optional[str], use_progressbar: bool) -> None: start_time = timer() print(f"Benchmarking: {filename}") cvrs = read_dominion_csv(filename) if cvrs is None: print(f"Failed to read {filename}, terminating.") exit(1) rows, cols = cvrs.data.shape parse_time = timer() print( f" Parse time: {parse_time - start_time: .3f} sec, {rows / (parse_time - start_time):.3f} ballots/sec" ) assert rows > 0, "can't have zero ballots!" # doesn't matter what the key is, so long as it's consistent for both runs keypair = get_optional( elgamal_keypair_from_secret(int_to_q_unchecked(31337))) rtally_start = timer() rtally = ray_tally_everything( cvrs, secret_key=keypair.secret_key, verbose=True, root_dir=output_dir, use_progressbar=use_progressbar, ) rtally_end = timer() print(f"\nOVERALL PERFORMANCE") print(f" Ray time: {rtally_end - rtally_start : .3f} sec") print( f" Ray rate: {rows / (rtally_end - rtally_start): .3f} ballots/sec" ) if output_dir: print(f"\nSANITY CHECK") assert rtally.all_proofs_valid( verbose=True, recheck_ballots_and_tallies=False, use_progressbar=use_progressbar, ), "proof failure!"
def test_ray_and_multiprocessing_agree( self, input: str, keypair: ElGamalKeyPair ) -> None: self.removeTree() # Normally these are generated internally, but by making them be the same, we take all # the non-determinism out of the tally_everything methods and get identical results. seed_hash = rand_q() master_nonce = rand_q() date = datetime.now() cvrs = read_dominion_csv(StringIO(input)) self.assertIsNotNone(cvrs) _, ballots, _ = cvrs.to_election_description() assert len(ballots) > 0, "can't have zero ballots!" print(f"Comparing tallies with {len(ballots)} ballot(s).") tally = fast_tally_everything( cvrs, verbose=False, date=date, secret_key=keypair.secret_key, pool=self.pool, seed_hash=seed_hash, master_nonce=master_nonce, use_progressbar=False, ) rtally = ray_tally_everything( cvrs, verbose=False, date=date, secret_key=keypair.secret_key, seed_hash=seed_hash, master_nonce=master_nonce, root_dir="rtally_output", use_progressbar=False, ) self.assertEqual(tally, rtally.to_fast_tally())
def test_end_to_end_publications_ray(self, input: str, check_proofs: bool, keypair: ElGamalKeyPair) -> None: self.removeTree( ) # if there's anything leftover from a prior run, get rid of it cvrs = read_dominion_csv(StringIO(input)) self.assertIsNotNone(cvrs) _, ballots, _ = cvrs.to_election_description() assert len(ballots) > 0, "can't have zero ballots!" results = ray_tally_everything( cvrs, secret_key=keypair.secret_key, verbose=True, root_dir=TALLY_TESTING_DIR, ) self.assertTrue(results.all_proofs_valid()) # dump files out to disk write_ray_tally(results, TALLY_TESTING_DIR) log_and_print( "tally_testing written, proceeding to read it back in again") # now, read it back again! results2 = load_ray_tally( TALLY_TESTING_DIR, check_proofs=check_proofs, verbose=True, recheck_ballots_and_tallies=True, ) self.assertIsNotNone(results2) log_and_print("tally_testing got non-null result!") self.assertTrue( _list_eq(results.encrypted_ballots, results2.encrypted_ballots)) self.assertTrue(results.equivalent(results2, keypair)) self.removeTree() # clean up our mess
def test_ray_end_to_end( self, input: str, keypair: ElGamalKeyPair, use_keypair: bool ) -> None: self.removeTree() cvrs = read_dominion_csv(StringIO(input)) self.assertIsNotNone(cvrs) _, ballots, _ = cvrs.to_election_description() assert len(ballots) > 0, "can't have zero ballots!" print(f"End-to-end Ray test with {len(ballots)} ballot(s).") if use_keypair: rtally = ray_tally_everything( cvrs, verbose=True, secret_key=keypair.secret_key, root_dir="rtally_output", use_progressbar=False, ) else: rtally = ray_tally_everything( cvrs, verbose=True, root_dir="rtally_output", use_progressbar=False ) ftally = rtally.to_fast_tally() self.assertTrue(ftally.all_proofs_valid(verbose=False)) # now, we'll write everything to the filesystem and make sure we get the # same stuff fmanifest = write_fast_tally(ftally, "ftally_output") rmanifest = write_ray_tally(rtally, "rtally_output") # we can't just assert equality of the manifests, because the root_dirs are different self.assertTrue(fmanifest.equivalent(rmanifest)) self.removeTree()
def test_repeating_candidate_names(self) -> None: input_str = """ "2018 Test Election","5.2.16.1","","","","","","","","","","","","","","" "","","","","","","","","District X (Vote For=3)","District X (Vote For=3)","District X (Vote For=3)","District X (Vote For=3)","Referendum 1","Referendum 1","Referendum 2","Referendum 2" "","","","","","","","","Alice","Write-in","Write-in","Write-in","For","Against","For","Against" "CvrNumber","TabulatorNum","BatchId","RecordId","ImprintedId","CountingGroup","PrecinctPortion","BallotType","DEM","","","","","","","" ="1",="1",="1",="1",="1-1-1","Mail","Thing1","T1","1","0","0","0","1","0","1","0" """ result: Optional[DominionCSV] = read_dominion_csv(StringIO(input_str)) if result is None: self.fail("Expected not none") else: self.assertNotEqual(result, None) self.assertIsNotNone(result.metadata.contest_map["District X"]) self.assertIsNotNone(result.metadata.contest_map["Referendum 1"]) self.assertIsNotNone(result.metadata.contest_map["Referendum 2"]) # make sure there are no "(2)" things going on in the referenda ref1_choice_names = { x.choice_name for x in result.metadata.contest_map["Referendum 1"] } ref2_choice_names = { x.choice_name for x in result.metadata.contest_map["Referendum 2"] } self.assertEqual({"For", "Against"}, ref1_choice_names) self.assertEqual({"For", "Against"}, ref2_choice_names) # and now make sure we have what we expect for our write-in race choice_names = { x.choice_name for x in result.metadata.contest_map["District X"] } self.assertEqual( choice_names, {"Alice", "Write-in", "Write-in (2)", "Write-in (3)"})
def run_bench(filename: str, pool: Pool, file_dir: Optional[str]) -> None: start_time = timer() print(f"Benchmarking: {filename}") log_info(f"Benchmarking: {filename}") cvrs = read_dominion_csv(filename) if cvrs is None: print(f"Failed to read {filename}, terminating.") exit(1) rows, cols = cvrs.data.shape parse_time = timer() print(f" Parse time: {parse_time - start_time: .3f} sec") assert rows > 0, "can't have zero ballots!" # doesn't matter what the key is, so long as it's consistent for both runs keypair = get_optional(elgamal_keypair_from_secret(int_to_q_unchecked(31337))) tally_start = timer() tally = fast_tally_everything( cvrs, pool, verbose=True, secret_key=keypair.secret_key ) if file_dir: write_fast_tally(tally, file_dir + "_fast") tally_end = timer() assert tally.all_proofs_valid(verbose=True), "proof failure!" print(f"\nstarting ray.io parallelism") rtally_start = timer() rtally = ray_tally_everything( cvrs, secret_key=keypair.secret_key, root_dir=file_dir + "_ray" if file_dir else None, ) rtally_end = timer() if file_dir: rtally_as_fast = rtally.to_fast_tally() assert rtally_as_fast.all_proofs_valid(verbose=True), "proof failure!" assert tally.equivalent( rtally_as_fast, keypair, pool ), "tallies aren't equivalent!" # Note: tally.equivalent() isn't quite as stringent as asserting that absolutely # everything is identical, but it's a pretty good sanity check for our purposes. # In tests/test_ray_tally.py, test_ray_and_multiprocessing_agree goes the extra # distance to create identical tallies from each system and assert their equality. print(f"\nOVERALL PERFORMANCE") print(f" Pool time: {tally_end - tally_start: .3f} sec") print(f" Pool rate: {rows / (tally_end - tally_start): .3f} ballots/sec") print(f" Ray time: {rtally_end - rtally_start : .3f} sec") print(f" Ray rate: {rows / (rtally_end - rtally_start): .3f} ballots/sec") print( f" Ray speedup: {(tally_end - tally_start) / (rtally_end - rtally_start) : .3f} (>1.0 = ray is faster, <1.0 = ray is slower)" ) if file_dir is not None: shutil.rmtree(file_dir + "_ray", ignore_errors=True) shutil.rmtree(file_dir + "_fast", ignore_errors=True)
tallydir = args.tallies use_cluster = args.cluster if path.exists(tallydir): print(f"Tally directory ({tallydir}) already exists. Exiting.") exit(1) admin_state: Optional[ElectionAdmin] = load_json_helper( ".", keyfile, ElectionAdmin) if admin_state is None or not admin_state.is_valid(): print(f"Election administration key material wasn't valid") exit(1) print(f"Starting up, reading {cvrfile}") start_time = timer() cvrs = read_dominion_csv(cvrfile) if cvrs is None: print(f"Failed to read {cvrfile}, terminating.") exit(1) rows, cols = cvrs.data.shape parse_time = timer() print( f" Parse time: {parse_time - start_time: .3f} sec, {rows / (parse_time - start_time):.3f} ballots/sec" ) print(f" Found {rows} CVRs in {cvrs.metadata.election_name}.") if use_cluster: ray_init_cluster() ray_wait_for_workers() else: ray_init_localhost()
def test_max_votes_per_race_sanity(self, cvrs: str) -> None: parsed = read_dominion_csv(StringIO(cvrs)) self.assertIsNotNone(parsed)
def test_read_dominion_csv_failures(self) -> None: self.assertIsNone(read_dominion_csv("no-such-file.csv"))
def test_end_to_end_publications(self, input: str, check_proofs: bool, keypair: ElGamalKeyPair) -> None: coverage.process_startup( ) # necessary for coverage testing to work in parallel self.removeTree( ) # if there's anything leftover from a prior run, get rid of it cvrs = read_dominion_csv(StringIO(input)) self.assertIsNotNone(cvrs) _, ballots, _ = cvrs.to_election_description() assert len(ballots) > 0, "can't have zero ballots!" results = fast_tally_everything(cvrs, self.pool, secret_key=keypair.secret_key, verbose=True) self.assertTrue(results.all_proofs_valid(self.pool)) # dump files out to disk write_fast_tally(results, TALLY_TESTING_DIR) log_and_print( "tally_testing written, proceeding to read it back in again") # now, read it back again! results2 = load_fast_tally( TALLY_TESTING_DIR, check_proofs=check_proofs, pool=self.pool, verbose=True, recheck_ballots_and_tallies=True, ) self.assertIsNotNone(results2) log_and_print("tally_testing got non-null result!") self.assertTrue( _list_eq(results.encrypted_ballots, results2.encrypted_ballots)) self.assertTrue(results.equivalent(results2, keypair, self.pool)) # Make sure there's an index.html file; throws an exception if it's missing self.assertIsNotNone(stat(path.join(TALLY_TESTING_DIR, "index.html"))) # And lastly, while we're here, we'll use all this machinery to exercise the ballot decryption # read/write facilities. ied = InternalElectionDescription(results.election_description) log_and_print("decrypting one more time") pballots = decrypt_ballots( ied, results.context.crypto_extended_base_hash, keypair, self.pool, results.encrypted_ballots, ) self.assertEqual(len(pballots), len(results.encrypted_ballots)) self.assertNotIn(None, pballots) # for speed, we're only going to do this for the first ballot, not all of them pballot = pballots[0] eballot = results.encrypted_ballots[0] bid = pballot.ballot.object_id self.assertTrue( verify_proven_ballot_proofs( results.context.crypto_extended_base_hash, keypair.public_key, eballot, pballot, )) write_proven_ballot(pballot, DECRYPTED_DIR) self.assertTrue(exists_proven_ballot(bid, DECRYPTED_DIR)) self.assertFalse(exists_proven_ballot(bid + "0", DECRYPTED_DIR)) self.assertEqual(pballot, load_proven_ballot(bid, DECRYPTED_DIR)) self.removeTree() # clean up our mess