def test_encrypt_ballot_with_stateful_composer_succeeds(self): # Arrange keypair = elgamal_keypair_from_secret(int_to_q(2)) manifest = election_factory.get_fake_manifest() internal_manifest, context = election_factory.get_fake_ciphertext_election( manifest, keypair.public_key) data = election_factory.get_fake_ballot(internal_manifest) self.assertTrue( data.is_valid(internal_manifest.ballot_styles[0].object_id)) device = election_factory.get_encryption_device() subject = EncryptionMediator(internal_manifest, context, device) # Act result = subject.encrypt(data) # Assert self.assertIsNotNone(result) self.assertTrue( result.is_valid_encryption( internal_manifest.manifest_hash, keypair.public_key, context.crypto_extended_base_hash, ))
def test_encrypt_ballot_simple_succeeds(self): # Arrange keypair = elgamal_keypair_from_secret(int_to_q(2)) manifest = election_factory.get_fake_manifest() internal_manifest, context = election_factory.get_fake_ciphertext_election( manifest, keypair.public_key) nonce_seed = TWO_MOD_Q # TODO: Ballot Factory subject = election_factory.get_fake_ballot(internal_manifest) self.assertTrue( subject.is_valid(internal_manifest.ballot_styles[0].object_id)) # Act result = encrypt_ballot(subject, internal_manifest, context, SEED) result_from_seed = encrypt_ballot(subject, internal_manifest, context, SEED, nonce_seed) # Assert self.assertIsNotNone(result) self.assertIsNotNone(result.code) self.assertIsNotNone(result_from_seed) self.assertTrue( result.is_valid_encryption( internal_manifest.manifest_hash, keypair.public_key, context.crypto_extended_base_hash, )) self.assertTrue( result_from_seed.is_valid_encryption( internal_manifest.manifest_hash, keypair.public_key, context.crypto_extended_base_hash, ))
def test_gmpy2_parallelism_is_safe(self): cpus = cpu_count() problem_size = 1000 secret_keys = Nonces(int_to_q_unchecked(3))[ 0:problem_size] # list of 1000 might-as-well-be-random Q's log_info( f"testing GMPY2 powmod parallelism safety (cpus = {cpus}, problem_size = {problem_size})" ) # compute in parallel start = timer() p = Pool(cpus) keypairs = p.map(elgamal_keypair_from_secret, secret_keys) end1 = timer() # verify scalar for keypair in keypairs: self.assertEqual( keypair.public_key, elgamal_keypair_from_secret(keypair.secret_key).public_key, ) end2 = timer() p.close( ) # apparently necessary to avoid warnings from the Pool system log_info(f"Parallelism speedup: {(end2 - end1) / (end1 - start):.3f}")
def test_simple_elgamal_encryption_decryption(self): nonce = ONE_MOD_Q secret_key = TWO_MOD_Q keypair = get_optional(elgamal_keypair_from_secret(secret_key)) public_key = keypair.public_key self.assertLess(public_key.to_int(), P) elem = g_pow_p(ZERO_MOD_Q) self.assertEqual(elem, ONE_MOD_P) # g^0 == 1 ciphertext = get_optional(elgamal_encrypt(0, nonce, keypair.public_key)) self.assertEqual(G, ciphertext.alpha.to_int()) self.assertEqual( pow(ciphertext.alpha.to_int(), secret_key.to_int(), P), pow(public_key.to_int(), nonce.to_int(), P), ) self.assertEqual( ciphertext.beta.to_int(), pow(public_key.to_int(), nonce.to_int(), P), ) plaintext = ciphertext.decrypt(keypair.secret_key) self.assertEqual(0, plaintext)
def test_encrypt_simple_ballot_from_files_succeeds(self): # Arrange keypair = elgamal_keypair_from_secret(int_to_q(2)) election = election_factory.get_simple_election_from_file() metadata, context = election_factory.get_fake_ciphertext_election( election, keypair.public_key ) data = ballot_factory.get_simple_ballot_from_file() self.assertTrue(data.is_valid(metadata.ballot_styles[0].object_id)) device = EncryptionDevice("Location") subject = EncryptionMediator(metadata, context, device) # Act result = subject.encrypt(data) # Assert self.assertIsNotNone(result) self.assertEqual(data.object_id, result.object_id) self.assertTrue( result.is_valid_encryption( metadata.description_hash, keypair.public_key, context.crypto_extended_base_hash, ) )
def test_cast_ballot(self): # Arrange keypair = elgamal_keypair_from_secret(int_to_q(2)) election = election_factory.get_fake_election() metadata, context = election_factory.get_fake_ciphertext_election( election, keypair.public_key ) store = BallotStore() source = election_factory.get_fake_ballot(metadata) self.assertTrue(source.is_valid(metadata.ballot_styles[0].object_id)) # Act data = encrypt_ballot(source, metadata, context, SEED_HASH) result = accept_ballot(data, BallotBoxState.CAST, metadata, context, store) # Assert expected = store.get(source.object_id) self.assertEqual(expected.state, BallotBoxState.CAST) self.assertEqual(result.state, BallotBoxState.CAST) self.assertEqual(expected.object_id, result.object_id) # Test failure modes self.assertIsNone( accept_ballot(data, BallotBoxState.CAST, metadata, context, store) ) # cannot cast again self.assertIsNone( accept_ballot(data, BallotBoxState.SPOILED, metadata, context, store) ) # cannot cspoil a ballot already cast
def test_encrypt_simple_selection_malformed_data_fails(self): # Arrange keypair = elgamal_keypair_from_secret(int_to_q(2)) nonce = randbelow(Q) metadata = SelectionDescription("some-selection-object-id", "some-candidate-id", 1) hash_context = metadata.crypto_hash() subject = selection_from(metadata) self.assertTrue(subject.is_valid(metadata.object_id)) # Act result = encrypt_selection(subject, metadata, keypair.public_key, nonce) # tamper with the description_hash malformed_description_hash = deepcopy(result) malformed_description_hash.description_hash = TWO_MOD_Q # remove the proof missing_proof = deepcopy(result) missing_proof.proof = None # Assert self.assertFalse( malformed_description_hash.is_valid_encryption( hash_context, keypair.public_key)) self.assertFalse( missing_proof.is_valid_encryption(hash_context, keypair.public_key))
def test_djcp_proofs_simple(self): # doesn't get any simpler than this keypair = elgamal_keypair_from_secret(TWO_MOD_Q) nonce = ONE_MOD_Q seed = TWO_MOD_Q message0 = get_optional(elgamal_encrypt(0, nonce, keypair.public_key)) proof0 = make_disjunctive_chaum_pedersen_zero(message0, nonce, keypair.public_key, ONE_MOD_Q, seed) proof0bad = make_disjunctive_chaum_pedersen_one( message0, nonce, keypair.public_key, ONE_MOD_Q, seed) self.assertTrue( proof0.is_valid(message0, keypair.public_key, ONE_MOD_Q)) self.assertFalse( proof0bad.is_valid(message0, keypair.public_key, ONE_MOD_Q)) message1 = get_optional(elgamal_encrypt(1, nonce, keypair.public_key)) proof1 = make_disjunctive_chaum_pedersen_one(message1, nonce, keypair.public_key, ONE_MOD_Q, seed) proof1bad = make_disjunctive_chaum_pedersen_zero( message1, nonce, keypair.public_key, ONE_MOD_Q, seed) self.assertTrue( proof1.is_valid(message1, keypair.public_key, ONE_MOD_Q)) self.assertFalse( proof1bad.is_valid(message1, keypair.public_key, ONE_MOD_Q))
def test_encrypt_ballot_simple_succeeds(self): # Arrange keypair = elgamal_keypair_from_secret(int_to_q(2)) election = election_factory.get_fake_election() metadata, context = election_factory.get_fake_ciphertext_election( election, keypair.public_key) nonce_seed = TWO_MOD_Q # TODO: Ballot Factory subject = election_factory.get_fake_ballot(metadata) self.assertTrue(subject.is_valid(metadata.ballot_styles[0].object_id)) # Act result = encrypt_ballot(subject, metadata, context, SEED_HASH) tracker_code = result.get_tracker_code() result_from_seed = encrypt_ballot(subject, metadata, context, SEED_HASH, nonce_seed) # Assert self.assertIsNotNone(result) self.assertIsNotNone(result.tracking_id) self.assertIsNotNone(tracker_code) self.assertIsNotNone(result_from_seed) self.assertTrue( result.is_valid_encryption(context.crypto_extended_base_hash, keypair.public_key)) self.assertTrue( result_from_seed.is_valid_encryption( context.crypto_extended_base_hash, keypair.public_key))
def test_gmpy2_parallelism_is_safe(self): """ Ensures running lots of parallel exponentiations still yields the correct answer. This verifies that nothing incorrect is happening in the GMPY2 library """ # Arrange scheduler = Scheduler() problem_size = 1000 random_secret_keys = Nonces(int_to_q_unchecked(3))[0:problem_size] log_info( f"testing GMPY2 powmod parallelism safety (cpus = {scheduler.cpu_count}, problem_size = {problem_size})" ) # Act start = timer() keypairs = scheduler.schedule( elgamal_keypair_from_secret, [list([secret_key]) for secret_key in random_secret_keys], ) end1 = timer() # Assert for keypair in keypairs: self.assertEqual( keypair.public_key, elgamal_keypair_from_secret(keypair.secret_key).public_key, ) end2 = timer() scheduler.close() log_info(f"Parallelism speedup: {(end2 - end1) / (end1 - start):.3f}")
def test_ballot_box_spoil_ballot(self): # Arrange keypair = elgamal_keypair_from_secret(int_to_q(2)) manifest = election_factory.get_fake_manifest() internal_manifest, context = election_factory.get_fake_ciphertext_election( manifest, keypair.public_key) store = DataStore() source = election_factory.get_fake_ballot(internal_manifest) self.assertTrue( source.is_valid(internal_manifest.ballot_styles[0].object_id)) # Act data = encrypt_ballot(source, internal_manifest, context, SEED) subject = BallotBox(internal_manifest, context, store) result = subject.spoil(data) # Assert expected = store.get(source.object_id) self.assertEqual(expected.state, BallotBoxState.SPOILED) self.assertEqual(result.state, BallotBoxState.SPOILED) self.assertEqual(expected.object_id, result.object_id) # Test failure modes self.assertIsNone(subject.spoil(data)) # cannot spoil again self.assertIsNone( subject.cast(data)) # cannot cast a ballot alraedy spoiled
def elgamal_keypairs(draw: _DrawType): """ Generates an arbitrary ElGamal secret/public keypair. :param draw: Hidden argument, used by Hypothesis. """ e = draw(elements_mod_q_no_zero()) return elgamal_keypair_from_secret(e if e != ONE_MOD_Q else TWO_MOD_Q)
def test_ccp_proofs_simple_encryption_of_one(self): keypair = elgamal_keypair_from_secret(TWO_MOD_Q) nonce = ONE_MOD_Q seed = TWO_MOD_Q message = get_optional(elgamal_encrypt(1, nonce, keypair.public_key)) proof = make_constant_chaum_pedersen(message, 1, nonce, keypair.public_key, seed) bad_proof = make_constant_chaum_pedersen(message, 0, nonce, keypair.public_key, seed) self.assertTrue(proof.is_valid(message, keypair.public_key)) self.assertFalse(bad_proof.is_valid(message, keypair.public_key))
def test_encrypt_simple_contest_referendum_succeeds(self): # Arrange keypair = elgamal_keypair_from_secret(int_to_q(2)) nonce = randbelow(Q) ballot_selections = [ SelectionDescription( "some-object-id-affirmative", "some-candidate-id-affirmative", 0 ), SelectionDescription( "some-object-id-negative", "some-candidate-id-negative", 1 ), ] placeholder_selections = [ SelectionDescription( "some-object-id-placeholder", "some-candidate-id-placeholder", 2 ) ] metadata = ContestDescriptionWithPlaceholders( "some-contest-object-id", "some-electoral-district-id", 0, VoteVariationType.one_of_m, 1, 1, "some-referendum-contest-name", ballot_selections, None, None, placeholder_selections, ) hash_context = metadata.crypto_hash() subject = contest_from(metadata) self.assertTrue( subject.is_valid( metadata.object_id, len(metadata.ballot_selections), metadata.number_elected, metadata.votes_allowed, ) ) # Act result = encrypt_contest( subject, metadata, keypair.public_key, ONE_MOD_Q, nonce ) # Assert self.assertIsNotNone(result) self.assertTrue( result.is_valid_encryption(hash_context, keypair.public_key, ONE_MOD_Q) )
def test_encrypt_contest_duplicate_selection_object_ids_fails(self): """ This is an example test of a failing test where the contest description is malformed """ random_seed = 0 description = ContestDescription( object_id="[email protected]", electoral_district_id="[email protected]", sequence_order=1, vote_variation=VoteVariationType.n_of_m, number_elected=1, votes_allowed=1, name="", ballot_selections=[ SelectionDescription( object_id="[email protected]", candidate_id="*****@*****.**", sequence_order=0, ), # Note the selection description is the same as the first sequence element SelectionDescription( object_id="[email protected]", candidate_id="*****@*****.**", sequence_order=1, ), ], ) keypair = elgamal_keypair_from_secret(TWO_MOD_Q) seed = ONE_MOD_Q # Bypass checking the validity of the description data = ballot_factory.get_random_contest_from( description, Random(0), suppress_validity_check=True ) placeholders = generate_placeholder_selections_from( description, description.number_elected ) description_with_placeholders = contest_description_with_placeholders_from( description, placeholders ) # Act subject = encrypt_contest( data, description_with_placeholders, keypair.public_key, ONE_MOD_Q, seed ) self.assertIsNone(subject)
def test_cp_proofs_simple(self): keypair = elgamal_keypair_from_secret(TWO_MOD_Q) nonce = ONE_MOD_Q seed = TWO_MOD_Q message = get_optional(elgamal_encrypt(0, nonce, keypair.public_key)) decryption = message.partial_decrypt(keypair.secret_key) proof = make_chaum_pedersen(message, keypair.secret_key, decryption, seed, ONE_MOD_Q) bad_proof = make_chaum_pedersen(message, keypair.secret_key, TWO_MOD_Q, seed, ONE_MOD_Q) self.assertTrue( proof.is_valid(message, keypair.public_key, decryption, ONE_MOD_Q)) self.assertFalse( bad_proof.is_valid(message, keypair.public_key, decryption, ONE_MOD_Q))
def test_djcp_proof_invalid_inputs(self): # this is here to push up our coverage keypair = elgamal_keypair_from_secret(TWO_MOD_Q) nonce = ONE_MOD_Q seed = TWO_MOD_Q message0 = get_optional(elgamal_encrypt(0, nonce, keypair.public_key)) self.assertRaises( Exception, make_disjunctive_chaum_pedersen, message0, nonce, keypair.public_key, seed, 3, )
def test_encrypt_contest_manually_formed_contest_description_valid_succeeds(self): description = ContestDescription( object_id="[email protected]", electoral_district_id="[email protected]", sequence_order=1, vote_variation=VoteVariationType.n_of_m, number_elected=1, votes_allowed=1, name="", ballot_selections=[ SelectionDescription( object_id="[email protected]", candidate_id="*****@*****.**", sequence_order=0, ), SelectionDescription( object_id="[email protected]", candidate_id="*****@*****.**", sequence_order=1, ), ], ballot_title=None, ballot_subtitle=None, ) keypair = elgamal_keypair_from_secret(TWO_MOD_Q) seed = ONE_MOD_Q #################### data = ballot_factory.get_random_contest_from(description, Random(0)) placeholders = generate_placeholder_selections_from( description, description.number_elected ) description_with_placeholders = contest_description_with_placeholders_from( description, placeholders ) # Act subject = encrypt_contest( data, description_with_placeholders, keypair.public_key, ONE_MOD_Q, seed, should_verify_proofs=True, ) self.assertIsNotNone(subject)
def run_bench(filename: str, output_dir: Optional[str], use_progressbar: bool) -> None: start_time = timer() print(f"Benchmarking: {filename}") cvrs = read_dominion_csv(filename) if cvrs is None: print(f"Failed to read {filename}, terminating.") exit(1) rows, cols = cvrs.data.shape parse_time = timer() print( f" Parse time: {parse_time - start_time: .3f} sec, {rows / (parse_time - start_time):.3f} ballots/sec" ) assert rows > 0, "can't have zero ballots!" # doesn't matter what the key is, so long as it's consistent for both runs keypair = get_optional( elgamal_keypair_from_secret(int_to_q_unchecked(31337))) rtally_start = timer() rtally = ray_tally_everything( cvrs, secret_key=keypair.secret_key, verbose=True, root_dir=output_dir, use_progressbar=use_progressbar, ) rtally_end = timer() print(f"\nOVERALL PERFORMANCE") print(f" Ray time: {rtally_end - rtally_start : .3f} sec") print( f" Ray rate: {rows / (rtally_end - rtally_start): .3f} ballots/sec" ) if output_dir: print(f"\nSANITY CHECK") assert rtally.all_proofs_valid( verbose=True, recheck_ballots_and_tallies=False, use_progressbar=use_progressbar, ), "proof failure!"
def setUp(self) -> None: # Election setup election_factory = ElectionFactory() keypair = elgamal_keypair_from_secret(int_to_q(2)) manifest = election_factory.get_fake_manifest() ( self.internal_manifest, self.context, ) = election_factory.get_fake_ciphertext_election( manifest, keypair.public_key) device_hash = ElectionFactory.get_encryption_device().get_hash() # Arrange ballots self.plaintext_ballot = election_factory.get_fake_ballot( self.internal_manifest) ciphertext_ballot = encrypt_ballot(self.plaintext_ballot, self.internal_manifest, self.context, device_hash) self.ballot_nonce = ciphertext_ballot.nonce self.submitted_ballot = from_ciphertext_ballot(ciphertext_ballot, BallotBoxState.CAST)
def test_encrypt_simple_selection_succeeds(self): # Arrange keypair = elgamal_keypair_from_secret(int_to_q(2)) nonce = randbelow(Q) metadata = SelectionDescription("some-selection-object-id", "some-candidate-id", 1) hash_context = metadata.crypto_hash() subject = selection_from(metadata) self.assertTrue(subject.is_valid(metadata.object_id)) # Act result = encrypt_selection(subject, metadata, keypair.public_key, nonce) # Assert self.assertIsNotNone(result) self.assertIsNotNone(result.message) self.assertTrue( result.is_valid_encryption(hash_context, keypair.public_key))
def test_schnorr_proofs_simple(self) -> None: # doesn't get any simpler than this keypair = get_optional(elgamal_keypair_from_secret(TWO_MOD_Q)) nonce = ONE_MOD_Q proof = make_schnorr_proof(keypair, nonce) self.assertTrue(proof.is_valid())
def ray_tally_everything( cvrs: DominionCSV, verbose: bool = True, use_progressbar: bool = True, date: Optional[datetime] = None, seed_hash: Optional[ElementModQ] = None, master_nonce: Optional[ElementModQ] = None, secret_key: Optional[ElementModQ] = None, root_dir: Optional[str] = None, ) -> "RayTallyEverythingResults": """ This top-level function takes a collection of Dominion CVRs and produces everything that we might want for arlo-e2e: a list of encrypted ballots, their encrypted and decrypted tally, and proofs of the correctness of the whole thing. The election `secret_key` is an optional parameter. If absent, a random keypair is generated and used. Similarly, if a `seed_hash` or `master_nonce` is not provided, random ones are generated and used. For parallelism, Ray is used. Make sure you've called `ray.init()` or `ray_localhost_init()` before calling this. If `root_dir` is specified, then the tally is written out to the specified directory, and the resulting `RayTallyEverythingResults` object will support the methods that allow those ballots to be read back in again. Conversely, if `root_dir` is `None`, then nothing is written to disk, and the result will not have access to individual ballots. """ rows, cols = cvrs.data.shape ray_wait_for_workers(min_workers=2) if date is None: date = datetime.now() if root_dir is not None: mkdir_helper(root_dir, num_retries=NUM_WRITE_RETRIES) r_manifest_aggregator = ManifestAggregatorActor.remote( root_dir) # type: ignore else: r_manifest_aggregator = None r_root_dir = ray.put(root_dir) start_time = timer() # Performance note: by using to_election_description_ray rather than to_election_description, we're # only getting back a list of dictionaries rather than a list of PlaintextBallots. We're pushing that # work out into the nodes, where it will run in parallel. The BallotPlaintextFactory wraps up all # the (immutable) state necessary to convert from these dicts to PlaintextBallots and is meant to # be sent to every node in the cluster. ed, bpf, ballot_dicts, id_map = cvrs.to_election_description_ray(date=date) setup_time = timer() num_ballots = len(ballot_dicts) assert num_ballots > 0, "can't have zero ballots!" log_and_print( f"ElectionGuard setup time: {setup_time - start_time: .3f} sec, {num_ballots / (setup_time - start_time):.3f} ballots/sec" ) keypair = (elgamal_keypair_random() if secret_key is None else elgamal_keypair_from_secret(secret_key)) assert keypair is not None, "unexpected failure with keypair computation" secret_key, public_key = keypair cec = make_ciphertext_election_context( number_of_guardians=1, quorum=1, elgamal_public_key=public_key, description_hash=ed.crypto_hash(), ) r_cec = ray.put(cec) ied = InternalElectionDescription(ed) r_ied = ray.put(ied) if seed_hash is None: seed_hash = rand_q() r_seed_hash = ray.put(seed_hash) r_keypair = ray.put(keypair) r_ballot_plaintext_factory = ray.put(bpf) if master_nonce is None: master_nonce = rand_q() nonces = Nonces(master_nonce) r_nonces = ray.put(nonces) nonce_indices = range(num_ballots) inputs = list(zip(ballot_dicts, nonce_indices)) batches = shard_list_uniform(inputs, BATCH_SIZE) num_batches = len(batches) log_and_print( f"Launching Ray.io remote encryption! (number of batches: {num_batches})" ) start_time = timer() progressbar = (ProgressBar({ "Ballots": num_ballots, "Tallies": num_ballots, "Iterations": 0, "Batch": 0, }) if use_progressbar else None) progressbar_actor = progressbar.actor if progressbar is not None else None batch_tallies: List[ObjectRef] = [] for batch in batches: if progressbar_actor: progressbar_actor.update_completed.remote("Batch", 1) num_ballots_in_batch = len(batch) sharded_inputs = shard_list_uniform(batch, BALLOTS_PER_SHARD) num_shards = len(sharded_inputs) partial_tally_refs = [ r_encrypt_and_write.remote( r_ied, r_cec, r_seed_hash, r_root_dir, r_manifest_aggregator, progressbar_actor, r_ballot_plaintext_factory, r_nonces, right_tuple_list(shard), *(left_tuple_list(shard)), ) for shard in sharded_inputs ] # log_and_print("Remote tallying.") btally = ray_tally_ballots(partial_tally_refs, BALLOTS_PER_SHARD, progressbar) batch_tallies.append(btally) # Each batch ultimately yields one partial tally; we add these up here at the # very end. If we have a million ballots and have batches of 10k ballots, this # would mean we'd have only 100 partial tallies. So, what's here works just fine. # If we wanted, we could certainly burn some scalar time and keep a running, # singular, partial tally. It's probably more important to push onward to the # next batch, so we can do as much work in parallel as possible. if len(batch_tallies) > 1: tally = ray.get(ray_tally_ballots(batch_tallies, 10, progressbar)) else: tally = ray.get(batch_tallies[0]) if progressbar: progressbar.close() assert tally is not None, "tally failed!" log_and_print("Tally decryption.") decrypted_tally: DECRYPT_TALLY_OUTPUT_TYPE = ray_decrypt_tally( tally, r_cec, r_keypair, seed_hash) log_and_print("Validating tally.") # Sanity-checking logic: make sure we don't have any unexpected keys, and that the decrypted totals # match up with the columns in the original plaintext data. tally_keys = set(decrypted_tally.keys()) expected_keys = set(id_map.keys()) assert tally_keys.issubset( expected_keys ), f"bad tally keys (actual keys: {sorted(tally_keys)}, expected keys: {sorted(expected_keys)})" for obj_id in decrypted_tally.keys(): cvr_sum = int(cvrs.data[id_map[obj_id]].sum()) decryption, proof = decrypted_tally[obj_id] assert cvr_sum == decryption, f"decryption failed for {obj_id}" final_manifest: Optional[Manifest] = None if root_dir is not None: final_manifest = ray.get(r_manifest_aggregator.result.remote()) assert isinstance( final_manifest, Manifest), "type error: bad result from manifest aggregation" # Assemble the data structure that we're returning. Having nonces in the ciphertext makes these # structures sensitive for writing out to disk, but otherwise they're ready to go. log_and_print("Constructing results.") reported_tally: Dict[str, SelectionInfo] = { k: SelectionInfo( object_id=k, encrypted_tally=tally[k], # we need to forcibly convert mpz to int here to make serialization work properly decrypted_tally=int(decrypted_tally[k][0]), proof=decrypted_tally[k][1], ) for k in tally.keys() } tabulate_time = timer() log_and_print( f"Encryption and tabulation: {rows} ballots, {rows / (tabulate_time - start_time): .3f} ballot/sec", verbose, ) return RayTallyEverythingResults( metadata=cvrs.metadata, cvr_metadata=cvrs.dataframe_without_selections(), election_description=ed, num_ballots=rows, manifest=final_manifest, tally=SelectionTally(reported_tally), context=cec, )
def test_elgamal_keypair_from_secret_requires_key_greater_than_one(self): self.assertEqual(None, elgamal_keypair_from_secret(ZERO_MOD_Q)) self.assertEqual(None, elgamal_keypair_from_secret(ONE_MOD_Q))
def run_bench(filename: str, pool: Pool, file_dir: Optional[str]) -> None: start_time = timer() print(f"Benchmarking: {filename}") log_info(f"Benchmarking: {filename}") cvrs = read_dominion_csv(filename) if cvrs is None: print(f"Failed to read {filename}, terminating.") exit(1) rows, cols = cvrs.data.shape parse_time = timer() print(f" Parse time: {parse_time - start_time: .3f} sec") assert rows > 0, "can't have zero ballots!" # doesn't matter what the key is, so long as it's consistent for both runs keypair = get_optional(elgamal_keypair_from_secret(int_to_q_unchecked(31337))) tally_start = timer() tally = fast_tally_everything( cvrs, pool, verbose=True, secret_key=keypair.secret_key ) if file_dir: write_fast_tally(tally, file_dir + "_fast") tally_end = timer() assert tally.all_proofs_valid(verbose=True), "proof failure!" print(f"\nstarting ray.io parallelism") rtally_start = timer() rtally = ray_tally_everything( cvrs, secret_key=keypair.secret_key, root_dir=file_dir + "_ray" if file_dir else None, ) rtally_end = timer() if file_dir: rtally_as_fast = rtally.to_fast_tally() assert rtally_as_fast.all_proofs_valid(verbose=True), "proof failure!" assert tally.equivalent( rtally_as_fast, keypair, pool ), "tallies aren't equivalent!" # Note: tally.equivalent() isn't quite as stringent as asserting that absolutely # everything is identical, but it's a pretty good sanity check for our purposes. # In tests/test_ray_tally.py, test_ray_and_multiprocessing_agree goes the extra # distance to create identical tallies from each system and assert their equality. print(f"\nOVERALL PERFORMANCE") print(f" Pool time: {tally_end - tally_start: .3f} sec") print(f" Pool rate: {rows / (tally_end - tally_start): .3f} ballots/sec") print(f" Ray time: {rtally_end - rtally_start : .3f} sec") print(f" Ray rate: {rows / (rtally_end - rtally_start): .3f} ballots/sec") print( f" Ray speedup: {(tally_end - tally_start) / (rtally_end - rtally_start) : .3f} (>1.0 = ray is faster, <1.0 = ray is slower)" ) if file_dir is not None: shutil.rmtree(file_dir + "_ray", ignore_errors=True) shutil.rmtree(file_dir + "_fast", ignore_errors=True)
def fast_tally_everything( cvrs: DominionCSV, pool: Optional[Pool] = None, verbose: bool = True, date: Optional[datetime] = None, seed_hash: Optional[ElementModQ] = None, master_nonce: Optional[ElementModQ] = None, secret_key: Optional[ElementModQ] = None, use_progressbar: bool = True, ) -> FastTallyEverythingResults: """ This top-level function takes a collection of Dominion CVRs and produces everything that we might want for arlo-e2e: a list of encrypted ballots, their encrypted and decrypted tally, and proofs of the correctness of the whole thing. The election `secret_key` is an optional parameter. If absent, a random keypair is generated and used. Similarly, if a `seed_hash` or `master_nonce` is not provided, random ones are generated and used. For parallelism, a `multiprocessing.pool.Pool` may be provided, and should result in significant speedups on multicore computers. If absent, the computation will proceed sequentially. """ rows, cols = cvrs.data.shape if date is None: date = datetime.now() parse_time = timer() log_and_print(f"Rows: {rows}, cols: {cols}", verbose) ed, ballots, id_map = cvrs.to_election_description(date=date) assert len(ballots) > 0, "can't have zero ballots!" keypair = (elgamal_keypair_random() if secret_key is None else elgamal_keypair_from_secret(secret_key)) assert keypair is not None, "unexpected failure with keypair computation" secret_key, public_key = keypair # This computation exists only to cause side-effects in the DLog engine, so the lame nonce is not an issue. assert len(ballots) == get_optional( elgamal_encrypt(m=len(ballots), nonce=int_to_q_unchecked(3), public_key=public_key)).decrypt( secret_key), "got wrong ElGamal decryption!" dlog_prime_time = timer() log_and_print( f"DLog prime time (n={len(ballots)}): {dlog_prime_time - parse_time: .3f} sec", verbose, ) cec = make_ciphertext_election_context( number_of_guardians=1, quorum=1, elgamal_public_key=public_key, description_hash=ed.crypto_hash(), ) ied = InternalElectionDescription(ed) # REVIEW THIS: is this cryptographically sound? Is the seed_hash properly a secret? Should # it go in the output? The nonces are clearly secret. If you know them, you can decrypt. if seed_hash is None: seed_hash = rand_q() if master_nonce is None: master_nonce = rand_q() nonces: List[ElementModQ] = Nonces(master_nonce)[0:len(ballots)] # even if verbose is false, we still want to see the progress bar for the encryption cballots = fast_encrypt_ballots(ballots, ied, cec, seed_hash, nonces, pool, use_progressbar=use_progressbar) eg_encrypt_time = timer() log_and_print( f"Encryption time: {eg_encrypt_time - dlog_prime_time: .3f} sec", verbose) log_and_print( f"Encryption rate: {rows / (eg_encrypt_time - dlog_prime_time): .3f} ballot/sec", verbose, ) tally: TALLY_TYPE = fast_tally_ballots(cballots, pool) eg_tabulate_time = timer() log_and_print( f"Tabulation time: {eg_tabulate_time - eg_encrypt_time: .3f} sec", verbose) log_and_print( f"Tabulation rate: {rows / (eg_tabulate_time - eg_encrypt_time): .3f} ballot/sec", verbose, ) log_and_print( f"Encryption and tabulation: {rows} ballots / {eg_tabulate_time - dlog_prime_time: .3f} sec = {rows / (eg_tabulate_time - dlog_prime_time): .3f} ballot/sec", verbose, ) assert tally is not None, "tally failed!" if verbose: # pragma: no cover print("Decryption & Proofs: ") decrypted_tally: DECRYPT_TALLY_OUTPUT_TYPE = fast_decrypt_tally( tally, cec, keypair, seed_hash, pool, verbose) eg_decryption_time = timer() log_and_print( f"Decryption time: {eg_decryption_time - eg_tabulate_time: .3f} sec", verbose) log_and_print( f"Decryption rate: {len(decrypted_tally.keys()) / (eg_decryption_time - eg_tabulate_time): .3f} selection/sec", verbose, ) # Sanity-checking logic: make sure we don't have any unexpected keys, and that the decrypted totals # match up with the columns in the original plaintext data. for obj_id in decrypted_tally.keys(): assert obj_id in id_map, "object_id in results that we don't know about!" cvr_sum = int(cvrs.data[id_map[obj_id]].sum()) decryption, proof = decrypted_tally[obj_id] assert cvr_sum == decryption, f"decryption failed for {obj_id}" # Assemble the data structure that we're returning. Having nonces in the ciphertext makes these # structures sensitive for writing out to disk, but otherwise they're ready to go. reported_tally: Dict[str, SelectionInfo] = { k: SelectionInfo( object_id=k, encrypted_tally=tally[k], # we need to forcibly convert mpz to int here to make serialization work properly decrypted_tally=int(decrypted_tally[k][0]), proof=decrypted_tally[k][1], ) for k in tally.keys() } # strips the ballots of their nonces, which is important because those could allow for decryption accepted_ballots = [ciphertext_ballot_to_accepted(x) for x in cballots] return FastTallyEverythingResults( metadata=cvrs.metadata, cvr_metadata=cvrs.dataframe_without_selections(), election_description=ed, encrypted_ballot_memos={ ballot.object_id: make_memo_value(ballot) for ballot in accepted_ballots }, tally=SelectionTally(reported_tally), context=cec, )
def test_ballot_store(self): # Arrange keypair = elgamal_keypair_from_secret(int_to_q(2)) election = election_factory.get_fake_election() metadata, context = election_factory.get_fake_ciphertext_election( election, keypair.public_key) # get an encrypted fake ballot to work with fake_ballot = election_factory.get_fake_ballot(metadata) encrypted_ballot = encrypt_ballot(fake_ballot, metadata, context, SEED_HASH) # Set up the ballot store subject = BallotStore() data_cast = CiphertextAcceptedBallot( encrypted_ballot.object_id, encrypted_ballot.ballot_style, encrypted_ballot.description_hash, encrypted_ballot.previous_tracking_hash, encrypted_ballot.contests, encrypted_ballot.tracking_hash, encrypted_ballot.timestamp, ) data_cast.state = BallotBoxState.CAST data_spoiled = CiphertextAcceptedBallot( encrypted_ballot.object_id, encrypted_ballot.ballot_style, encrypted_ballot.description_hash, encrypted_ballot.previous_tracking_hash, encrypted_ballot.contests, encrypted_ballot.tracking_hash, encrypted_ballot.timestamp, ) data_spoiled.state = BallotBoxState.SPOILED self.assertIsNone(subject.get("cast")) self.assertIsNone(subject.get("spoiled")) # try to set a ballot with an unknown state self.assertFalse( subject.set( "unknown", CiphertextAcceptedBallot( encrypted_ballot.object_id, encrypted_ballot.ballot_style, encrypted_ballot.description_hash, encrypted_ballot.previous_tracking_hash, encrypted_ballot.contests, encrypted_ballot.tracking_hash, encrypted_ballot.timestamp, ), )) # Act self.assertTrue(subject.set("cast", data_cast)) self.assertTrue(subject.set("spoiled", data_spoiled)) self.assertEqual(subject.get("cast"), data_cast) self.assertEqual(subject.get("spoiled"), data_spoiled) self.assertEqual(subject.exists("cast"), (True, data_cast)) self.assertEqual(subject.exists("spoiled"), (True, data_spoiled)) # test mutate state data_cast.state = BallotBoxState.UNKNOWN self.assertEqual(subject.exists("cast"), (False, data_cast)) # test remove self.assertTrue(subject.set("cast", None)) self.assertEqual(subject.exists("cast"), (False, None))
speedup: Dict[int, float] = {} print(f"CPUs detected: {cpu_count()}, launching thread pool") pool = Pool(cpu_count()) # warm up the pool to help get consistent measurements results = pool.map(identity, range(1, 30000)) assert results == list(range(1, 30000)) bench_start = timer() for size in problem_sizes: print("Benchmarking on problem size: ", size) seeds = rands[0:size] inputs = [ BenchInput( get_optional(elgamal_keypair_from_secret(a)), rands[size], rands[size + 1], ) for a in seeds ] start_all_scalar = timer() timing_data = [chaum_pedersen_bench(i) for i in inputs] end_all_scalar = timer() print(f" Creating Chaum-Pedersen proofs ({size} iterations)") avg_proof_scalar = average([t[0] for t in timing_data]) std_proof_scalar = std([t[0] for t in timing_data]) print(f" Avg = {avg_proof_scalar:.6f} sec") print(f" Stddev = {std_proof_scalar:.6f} sec") print(f" Validating Chaum-Pedersen proofs ({size} iterations)")