Example #1
0
    def test_encrypt_ballot_with_stateful_composer_succeeds(self):
        # Arrange
        keypair = elgamal_keypair_from_secret(int_to_q(2))
        manifest = election_factory.get_fake_manifest()
        internal_manifest, context = election_factory.get_fake_ciphertext_election(
            manifest, keypair.public_key)

        data = election_factory.get_fake_ballot(internal_manifest)
        self.assertTrue(
            data.is_valid(internal_manifest.ballot_styles[0].object_id))

        device = election_factory.get_encryption_device()
        subject = EncryptionMediator(internal_manifest, context, device)

        # Act
        result = subject.encrypt(data)

        # Assert
        self.assertIsNotNone(result)
        self.assertTrue(
            result.is_valid_encryption(
                internal_manifest.manifest_hash,
                keypair.public_key,
                context.crypto_extended_base_hash,
            ))
Example #2
0
    def test_encrypt_ballot_simple_succeeds(self):

        # Arrange
        keypair = elgamal_keypair_from_secret(int_to_q(2))
        manifest = election_factory.get_fake_manifest()
        internal_manifest, context = election_factory.get_fake_ciphertext_election(
            manifest, keypair.public_key)
        nonce_seed = TWO_MOD_Q

        # TODO: Ballot Factory
        subject = election_factory.get_fake_ballot(internal_manifest)
        self.assertTrue(
            subject.is_valid(internal_manifest.ballot_styles[0].object_id))

        # Act
        result = encrypt_ballot(subject, internal_manifest, context, SEED)
        result_from_seed = encrypt_ballot(subject, internal_manifest, context,
                                          SEED, nonce_seed)

        # Assert
        self.assertIsNotNone(result)
        self.assertIsNotNone(result.code)
        self.assertIsNotNone(result_from_seed)
        self.assertTrue(
            result.is_valid_encryption(
                internal_manifest.manifest_hash,
                keypair.public_key,
                context.crypto_extended_base_hash,
            ))
        self.assertTrue(
            result_from_seed.is_valid_encryption(
                internal_manifest.manifest_hash,
                keypair.public_key,
                context.crypto_extended_base_hash,
            ))
Example #3
0
    def test_gmpy2_parallelism_is_safe(self):
        cpus = cpu_count()
        problem_size = 1000
        secret_keys = Nonces(int_to_q_unchecked(3))[
            0:problem_size]  # list of 1000 might-as-well-be-random Q's
        log_info(
            f"testing GMPY2 powmod parallelism safety (cpus = {cpus}, problem_size = {problem_size})"
        )

        # compute in parallel
        start = timer()
        p = Pool(cpus)
        keypairs = p.map(elgamal_keypair_from_secret, secret_keys)
        end1 = timer()

        # verify scalar
        for keypair in keypairs:
            self.assertEqual(
                keypair.public_key,
                elgamal_keypair_from_secret(keypair.secret_key).public_key,
            )
        end2 = timer()
        p.close(
        )  # apparently necessary to avoid warnings from the Pool system
        log_info(f"Parallelism speedup: {(end2 - end1) / (end1 - start):.3f}")
Example #4
0
    def test_simple_elgamal_encryption_decryption(self):
        nonce = ONE_MOD_Q
        secret_key = TWO_MOD_Q
        keypair = get_optional(elgamal_keypair_from_secret(secret_key))
        public_key = keypair.public_key

        self.assertLess(public_key.to_int(), P)
        elem = g_pow_p(ZERO_MOD_Q)
        self.assertEqual(elem, ONE_MOD_P)  # g^0 == 1

        ciphertext = get_optional(elgamal_encrypt(0, nonce,
                                                  keypair.public_key))
        self.assertEqual(G, ciphertext.alpha.to_int())
        self.assertEqual(
            pow(ciphertext.alpha.to_int(), secret_key.to_int(), P),
            pow(public_key.to_int(), nonce.to_int(), P),
        )
        self.assertEqual(
            ciphertext.beta.to_int(),
            pow(public_key.to_int(), nonce.to_int(), P),
        )

        plaintext = ciphertext.decrypt(keypair.secret_key)

        self.assertEqual(0, plaintext)
Example #5
0
    def test_encrypt_simple_ballot_from_files_succeeds(self):
        # Arrange
        keypair = elgamal_keypair_from_secret(int_to_q(2))
        election = election_factory.get_simple_election_from_file()
        metadata, context = election_factory.get_fake_ciphertext_election(
            election, keypair.public_key
        )

        data = ballot_factory.get_simple_ballot_from_file()
        self.assertTrue(data.is_valid(metadata.ballot_styles[0].object_id))

        device = EncryptionDevice("Location")
        subject = EncryptionMediator(metadata, context, device)

        # Act
        result = subject.encrypt(data)

        # Assert
        self.assertIsNotNone(result)
        self.assertEqual(data.object_id, result.object_id)
        self.assertTrue(
            result.is_valid_encryption(
                metadata.description_hash,
                keypair.public_key,
                context.crypto_extended_base_hash,
            )
        )
    def test_cast_ballot(self):
        # Arrange
        keypair = elgamal_keypair_from_secret(int_to_q(2))
        election = election_factory.get_fake_election()
        metadata, context = election_factory.get_fake_ciphertext_election(
            election, keypair.public_key
        )
        store = BallotStore()
        source = election_factory.get_fake_ballot(metadata)
        self.assertTrue(source.is_valid(metadata.ballot_styles[0].object_id))

        # Act
        data = encrypt_ballot(source, metadata, context, SEED_HASH)
        result = accept_ballot(data, BallotBoxState.CAST, metadata, context, store)

        # Assert
        expected = store.get(source.object_id)
        self.assertEqual(expected.state, BallotBoxState.CAST)
        self.assertEqual(result.state, BallotBoxState.CAST)
        self.assertEqual(expected.object_id, result.object_id)

        # Test failure modes
        self.assertIsNone(
            accept_ballot(data, BallotBoxState.CAST, metadata, context, store)
        )  # cannot cast again
        self.assertIsNone(
            accept_ballot(data, BallotBoxState.SPOILED, metadata, context, store)
        )  # cannot cspoil a ballot already cast
    def test_encrypt_simple_selection_malformed_data_fails(self):

        # Arrange
        keypair = elgamal_keypair_from_secret(int_to_q(2))
        nonce = randbelow(Q)
        metadata = SelectionDescription("some-selection-object-id",
                                        "some-candidate-id", 1)
        hash_context = metadata.crypto_hash()

        subject = selection_from(metadata)
        self.assertTrue(subject.is_valid(metadata.object_id))

        # Act
        result = encrypt_selection(subject, metadata, keypair.public_key,
                                   nonce)

        # tamper with the description_hash
        malformed_description_hash = deepcopy(result)
        malformed_description_hash.description_hash = TWO_MOD_Q

        # remove the proof
        missing_proof = deepcopy(result)
        missing_proof.proof = None

        # Assert
        self.assertFalse(
            malformed_description_hash.is_valid_encryption(
                hash_context, keypair.public_key))
        self.assertFalse(
            missing_proof.is_valid_encryption(hash_context,
                                              keypair.public_key))
Example #8
0
    def test_djcp_proofs_simple(self):
        # doesn't get any simpler than this
        keypair = elgamal_keypair_from_secret(TWO_MOD_Q)
        nonce = ONE_MOD_Q
        seed = TWO_MOD_Q
        message0 = get_optional(elgamal_encrypt(0, nonce, keypair.public_key))
        proof0 = make_disjunctive_chaum_pedersen_zero(message0, nonce,
                                                      keypair.public_key,
                                                      ONE_MOD_Q, seed)
        proof0bad = make_disjunctive_chaum_pedersen_one(
            message0, nonce, keypair.public_key, ONE_MOD_Q, seed)
        self.assertTrue(
            proof0.is_valid(message0, keypair.public_key, ONE_MOD_Q))
        self.assertFalse(
            proof0bad.is_valid(message0, keypair.public_key, ONE_MOD_Q))

        message1 = get_optional(elgamal_encrypt(1, nonce, keypair.public_key))
        proof1 = make_disjunctive_chaum_pedersen_one(message1, nonce,
                                                     keypair.public_key,
                                                     ONE_MOD_Q, seed)
        proof1bad = make_disjunctive_chaum_pedersen_zero(
            message1, nonce, keypair.public_key, ONE_MOD_Q, seed)
        self.assertTrue(
            proof1.is_valid(message1, keypair.public_key, ONE_MOD_Q))
        self.assertFalse(
            proof1bad.is_valid(message1, keypair.public_key, ONE_MOD_Q))
    def test_encrypt_ballot_simple_succeeds(self):

        # Arrange
        keypair = elgamal_keypair_from_secret(int_to_q(2))
        election = election_factory.get_fake_election()
        metadata, context = election_factory.get_fake_ciphertext_election(
            election, keypair.public_key)
        nonce_seed = TWO_MOD_Q

        # TODO: Ballot Factory
        subject = election_factory.get_fake_ballot(metadata)
        self.assertTrue(subject.is_valid(metadata.ballot_styles[0].object_id))

        # Act
        result = encrypt_ballot(subject, metadata, context, SEED_HASH)
        tracker_code = result.get_tracker_code()
        result_from_seed = encrypt_ballot(subject, metadata, context,
                                          SEED_HASH, nonce_seed)

        # Assert
        self.assertIsNotNone(result)
        self.assertIsNotNone(result.tracking_id)
        self.assertIsNotNone(tracker_code)
        self.assertIsNotNone(result_from_seed)
        self.assertTrue(
            result.is_valid_encryption(context.crypto_extended_base_hash,
                                       keypair.public_key))
        self.assertTrue(
            result_from_seed.is_valid_encryption(
                context.crypto_extended_base_hash, keypair.public_key))
Example #10
0
    def test_gmpy2_parallelism_is_safe(self):
        """
        Ensures running lots of parallel exponentiations still yields the correct answer.
        This verifies that nothing incorrect is happening in the GMPY2 library
        """

        # Arrange
        scheduler = Scheduler()
        problem_size = 1000
        random_secret_keys = Nonces(int_to_q_unchecked(3))[0:problem_size]
        log_info(
            f"testing GMPY2 powmod parallelism safety (cpus = {scheduler.cpu_count}, problem_size = {problem_size})"
        )

        # Act
        start = timer()
        keypairs = scheduler.schedule(
            elgamal_keypair_from_secret,
            [list([secret_key]) for secret_key in random_secret_keys],
        )
        end1 = timer()

        # Assert
        for keypair in keypairs:
            self.assertEqual(
                keypair.public_key,
                elgamal_keypair_from_secret(keypair.secret_key).public_key,
            )
        end2 = timer()
        scheduler.close()
        log_info(f"Parallelism speedup: {(end2 - end1) / (end1 - start):.3f}")
Example #11
0
    def test_ballot_box_spoil_ballot(self):
        # Arrange
        keypair = elgamal_keypair_from_secret(int_to_q(2))
        manifest = election_factory.get_fake_manifest()
        internal_manifest, context = election_factory.get_fake_ciphertext_election(
            manifest, keypair.public_key)
        store = DataStore()
        source = election_factory.get_fake_ballot(internal_manifest)
        self.assertTrue(
            source.is_valid(internal_manifest.ballot_styles[0].object_id))

        # Act
        data = encrypt_ballot(source, internal_manifest, context, SEED)
        subject = BallotBox(internal_manifest, context, store)
        result = subject.spoil(data)

        # Assert
        expected = store.get(source.object_id)
        self.assertEqual(expected.state, BallotBoxState.SPOILED)
        self.assertEqual(result.state, BallotBoxState.SPOILED)
        self.assertEqual(expected.object_id, result.object_id)

        # Test failure modes
        self.assertIsNone(subject.spoil(data))  # cannot spoil again
        self.assertIsNone(
            subject.cast(data))  # cannot cast a ballot alraedy spoiled
Example #12
0
def elgamal_keypairs(draw: _DrawType):
    """
    Generates an arbitrary ElGamal secret/public keypair.

    :param draw: Hidden argument, used by Hypothesis.
    """
    e = draw(elements_mod_q_no_zero())
    return elgamal_keypair_from_secret(e if e != ONE_MOD_Q else TWO_MOD_Q)
 def test_ccp_proofs_simple_encryption_of_one(self):
     keypair = elgamal_keypair_from_secret(TWO_MOD_Q)
     nonce = ONE_MOD_Q
     seed = TWO_MOD_Q
     message = get_optional(elgamal_encrypt(1, nonce, keypair.public_key))
     proof = make_constant_chaum_pedersen(message, 1, nonce,
                                          keypair.public_key, seed)
     bad_proof = make_constant_chaum_pedersen(message, 0, nonce,
                                              keypair.public_key, seed)
     self.assertTrue(proof.is_valid(message, keypair.public_key))
     self.assertFalse(bad_proof.is_valid(message, keypair.public_key))
Example #14
0
    def test_encrypt_simple_contest_referendum_succeeds(self):
        # Arrange
        keypair = elgamal_keypair_from_secret(int_to_q(2))
        nonce = randbelow(Q)
        ballot_selections = [
            SelectionDescription(
                "some-object-id-affirmative", "some-candidate-id-affirmative", 0
            ),
            SelectionDescription(
                "some-object-id-negative", "some-candidate-id-negative", 1
            ),
        ]
        placeholder_selections = [
            SelectionDescription(
                "some-object-id-placeholder", "some-candidate-id-placeholder", 2
            )
        ]
        metadata = ContestDescriptionWithPlaceholders(
            "some-contest-object-id",
            "some-electoral-district-id",
            0,
            VoteVariationType.one_of_m,
            1,
            1,
            "some-referendum-contest-name",
            ballot_selections,
            None,
            None,
            placeholder_selections,
        )
        hash_context = metadata.crypto_hash()

        subject = contest_from(metadata)
        self.assertTrue(
            subject.is_valid(
                metadata.object_id,
                len(metadata.ballot_selections),
                metadata.number_elected,
                metadata.votes_allowed,
            )
        )

        # Act
        result = encrypt_contest(
            subject, metadata, keypair.public_key, ONE_MOD_Q, nonce
        )

        # Assert
        self.assertIsNotNone(result)
        self.assertTrue(
            result.is_valid_encryption(hash_context, keypair.public_key, ONE_MOD_Q)
        )
Example #15
0
    def test_encrypt_contest_duplicate_selection_object_ids_fails(self):
        """
        This is an example test of a failing test where the contest description
        is malformed
        """
        random_seed = 0

        description = ContestDescription(
            object_id="[email protected]",
            electoral_district_id="[email protected]",
            sequence_order=1,
            vote_variation=VoteVariationType.n_of_m,
            number_elected=1,
            votes_allowed=1,
            name="",
            ballot_selections=[
                SelectionDescription(
                    object_id="[email protected]",
                    candidate_id="*****@*****.**",
                    sequence_order=0,
                ),
                # Note the selection description is the same as the first sequence element
                SelectionDescription(
                    object_id="[email protected]",
                    candidate_id="*****@*****.**",
                    sequence_order=1,
                ),
            ],
        )

        keypair = elgamal_keypair_from_secret(TWO_MOD_Q)
        seed = ONE_MOD_Q

        # Bypass checking the validity of the description
        data = ballot_factory.get_random_contest_from(
            description, Random(0), suppress_validity_check=True
        )

        placeholders = generate_placeholder_selections_from(
            description, description.number_elected
        )
        description_with_placeholders = contest_description_with_placeholders_from(
            description, placeholders
        )

        # Act
        subject = encrypt_contest(
            data, description_with_placeholders, keypair.public_key, ONE_MOD_Q, seed
        )
        self.assertIsNone(subject)
 def test_cp_proofs_simple(self):
     keypair = elgamal_keypair_from_secret(TWO_MOD_Q)
     nonce = ONE_MOD_Q
     seed = TWO_MOD_Q
     message = get_optional(elgamal_encrypt(0, nonce, keypair.public_key))
     decryption = message.partial_decrypt(keypair.secret_key)
     proof = make_chaum_pedersen(message, keypair.secret_key, decryption,
                                 seed, ONE_MOD_Q)
     bad_proof = make_chaum_pedersen(message, keypair.secret_key, TWO_MOD_Q,
                                     seed, ONE_MOD_Q)
     self.assertTrue(
         proof.is_valid(message, keypair.public_key, decryption, ONE_MOD_Q))
     self.assertFalse(
         bad_proof.is_valid(message, keypair.public_key, decryption,
                            ONE_MOD_Q))
Example #17
0
 def test_djcp_proof_invalid_inputs(self):
     # this is here to push up our coverage
     keypair = elgamal_keypair_from_secret(TWO_MOD_Q)
     nonce = ONE_MOD_Q
     seed = TWO_MOD_Q
     message0 = get_optional(elgamal_encrypt(0, nonce, keypair.public_key))
     self.assertRaises(
         Exception,
         make_disjunctive_chaum_pedersen,
         message0,
         nonce,
         keypair.public_key,
         seed,
         3,
     )
Example #18
0
    def test_encrypt_contest_manually_formed_contest_description_valid_succeeds(self):
        description = ContestDescription(
            object_id="[email protected]",
            electoral_district_id="[email protected]",
            sequence_order=1,
            vote_variation=VoteVariationType.n_of_m,
            number_elected=1,
            votes_allowed=1,
            name="",
            ballot_selections=[
                SelectionDescription(
                    object_id="[email protected]",
                    candidate_id="*****@*****.**",
                    sequence_order=0,
                ),
                SelectionDescription(
                    object_id="[email protected]",
                    candidate_id="*****@*****.**",
                    sequence_order=1,
                ),
            ],
            ballot_title=None,
            ballot_subtitle=None,
        )

        keypair = elgamal_keypair_from_secret(TWO_MOD_Q)
        seed = ONE_MOD_Q

        ####################
        data = ballot_factory.get_random_contest_from(description, Random(0))

        placeholders = generate_placeholder_selections_from(
            description, description.number_elected
        )
        description_with_placeholders = contest_description_with_placeholders_from(
            description, placeholders
        )

        # Act
        subject = encrypt_contest(
            data,
            description_with_placeholders,
            keypair.public_key,
            ONE_MOD_Q,
            seed,
            should_verify_proofs=True,
        )
        self.assertIsNotNone(subject)
Example #19
0
def run_bench(filename: str, output_dir: Optional[str],
              use_progressbar: bool) -> None:
    start_time = timer()
    print(f"Benchmarking: {filename}")
    cvrs = read_dominion_csv(filename)
    if cvrs is None:
        print(f"Failed to read {filename}, terminating.")
        exit(1)
    rows, cols = cvrs.data.shape

    parse_time = timer()
    print(
        f"    Parse time: {parse_time - start_time: .3f} sec, {rows / (parse_time - start_time):.3f} ballots/sec"
    )

    assert rows > 0, "can't have zero ballots!"

    # doesn't matter what the key is, so long as it's consistent for both runs
    keypair = get_optional(
        elgamal_keypair_from_secret(int_to_q_unchecked(31337)))

    rtally_start = timer()
    rtally = ray_tally_everything(
        cvrs,
        secret_key=keypair.secret_key,
        verbose=True,
        root_dir=output_dir,
        use_progressbar=use_progressbar,
    )
    rtally_end = timer()

    print(f"\nOVERALL PERFORMANCE")
    print(f"    Ray time:    {rtally_end - rtally_start : .3f} sec")
    print(
        f"    Ray rate:    {rows / (rtally_end - rtally_start): .3f} ballots/sec"
    )

    if output_dir:
        print(f"\nSANITY CHECK")
        assert rtally.all_proofs_valid(
            verbose=True,
            recheck_ballots_and_tallies=False,
            use_progressbar=use_progressbar,
        ), "proof failure!"
Example #20
0
    def setUp(self) -> None:
        # Election setup
        election_factory = ElectionFactory()
        keypair = elgamal_keypair_from_secret(int_to_q(2))
        manifest = election_factory.get_fake_manifest()
        (
            self.internal_manifest,
            self.context,
        ) = election_factory.get_fake_ciphertext_election(
            manifest, keypair.public_key)
        device_hash = ElectionFactory.get_encryption_device().get_hash()

        # Arrange ballots
        self.plaintext_ballot = election_factory.get_fake_ballot(
            self.internal_manifest)
        ciphertext_ballot = encrypt_ballot(self.plaintext_ballot,
                                           self.internal_manifest,
                                           self.context, device_hash)
        self.ballot_nonce = ciphertext_ballot.nonce
        self.submitted_ballot = from_ciphertext_ballot(ciphertext_ballot,
                                                       BallotBoxState.CAST)
    def test_encrypt_simple_selection_succeeds(self):

        # Arrange
        keypair = elgamal_keypair_from_secret(int_to_q(2))
        nonce = randbelow(Q)
        metadata = SelectionDescription("some-selection-object-id",
                                        "some-candidate-id", 1)
        hash_context = metadata.crypto_hash()

        subject = selection_from(metadata)
        self.assertTrue(subject.is_valid(metadata.object_id))

        # Act
        result = encrypt_selection(subject, metadata, keypair.public_key,
                                   nonce)

        # Assert
        self.assertIsNotNone(result)
        self.assertIsNotNone(result.message)
        self.assertTrue(
            result.is_valid_encryption(hash_context, keypair.public_key))
Example #22
0
 def test_schnorr_proofs_simple(self) -> None:
     # doesn't get any simpler than this
     keypair = get_optional(elgamal_keypair_from_secret(TWO_MOD_Q))
     nonce = ONE_MOD_Q
     proof = make_schnorr_proof(keypair, nonce)
     self.assertTrue(proof.is_valid())
Example #23
0
def ray_tally_everything(
    cvrs: DominionCSV,
    verbose: bool = True,
    use_progressbar: bool = True,
    date: Optional[datetime] = None,
    seed_hash: Optional[ElementModQ] = None,
    master_nonce: Optional[ElementModQ] = None,
    secret_key: Optional[ElementModQ] = None,
    root_dir: Optional[str] = None,
) -> "RayTallyEverythingResults":
    """
    This top-level function takes a collection of Dominion CVRs and produces everything that
    we might want for arlo-e2e: a list of encrypted ballots, their encrypted and decrypted tally,
    and proofs of the correctness of the whole thing. The election `secret_key` is an optional
    parameter. If absent, a random keypair is generated and used. Similarly, if a `seed_hash` or
    `master_nonce` is not provided, random ones are generated and used.

    For parallelism, Ray is used. Make sure you've called `ray.init()` or `ray_localhost_init()`
    before calling this.

    If `root_dir` is specified, then the tally is written out to the specified directory, and
    the resulting `RayTallyEverythingResults` object will support the methods that allow those
    ballots to be read back in again. Conversely, if `root_dir` is `None`, then nothing is
    written to disk, and the result will not have access to individual ballots.
    """

    rows, cols = cvrs.data.shape

    ray_wait_for_workers(min_workers=2)

    if date is None:
        date = datetime.now()

    if root_dir is not None:
        mkdir_helper(root_dir, num_retries=NUM_WRITE_RETRIES)
        r_manifest_aggregator = ManifestAggregatorActor.remote(
            root_dir)  # type: ignore
    else:
        r_manifest_aggregator = None

    r_root_dir = ray.put(root_dir)

    start_time = timer()

    # Performance note: by using to_election_description_ray rather than to_election_description, we're
    # only getting back a list of dictionaries rather than a list of PlaintextBallots. We're pushing that
    # work out into the nodes, where it will run in parallel. The BallotPlaintextFactory wraps up all
    # the (immutable) state necessary to convert from these dicts to PlaintextBallots and is meant to
    # be sent to every node in the cluster.

    ed, bpf, ballot_dicts, id_map = cvrs.to_election_description_ray(date=date)
    setup_time = timer()
    num_ballots = len(ballot_dicts)
    assert num_ballots > 0, "can't have zero ballots!"
    log_and_print(
        f"ElectionGuard setup time: {setup_time - start_time: .3f} sec, {num_ballots / (setup_time - start_time):.3f} ballots/sec"
    )

    keypair = (elgamal_keypair_random() if secret_key is None else
               elgamal_keypair_from_secret(secret_key))
    assert keypair is not None, "unexpected failure with keypair computation"
    secret_key, public_key = keypair

    cec = make_ciphertext_election_context(
        number_of_guardians=1,
        quorum=1,
        elgamal_public_key=public_key,
        description_hash=ed.crypto_hash(),
    )
    r_cec = ray.put(cec)

    ied = InternalElectionDescription(ed)
    r_ied = ray.put(ied)

    if seed_hash is None:
        seed_hash = rand_q()
    r_seed_hash = ray.put(seed_hash)
    r_keypair = ray.put(keypair)

    r_ballot_plaintext_factory = ray.put(bpf)

    if master_nonce is None:
        master_nonce = rand_q()

    nonces = Nonces(master_nonce)
    r_nonces = ray.put(nonces)
    nonce_indices = range(num_ballots)

    inputs = list(zip(ballot_dicts, nonce_indices))

    batches = shard_list_uniform(inputs, BATCH_SIZE)
    num_batches = len(batches)
    log_and_print(
        f"Launching Ray.io remote encryption! (number of batches: {num_batches})"
    )

    start_time = timer()

    progressbar = (ProgressBar({
        "Ballots": num_ballots,
        "Tallies": num_ballots,
        "Iterations": 0,
        "Batch": 0,
    }) if use_progressbar else None)
    progressbar_actor = progressbar.actor if progressbar is not None else None

    batch_tallies: List[ObjectRef] = []
    for batch in batches:
        if progressbar_actor:
            progressbar_actor.update_completed.remote("Batch", 1)

        num_ballots_in_batch = len(batch)
        sharded_inputs = shard_list_uniform(batch, BALLOTS_PER_SHARD)
        num_shards = len(sharded_inputs)

        partial_tally_refs = [
            r_encrypt_and_write.remote(
                r_ied,
                r_cec,
                r_seed_hash,
                r_root_dir,
                r_manifest_aggregator,
                progressbar_actor,
                r_ballot_plaintext_factory,
                r_nonces,
                right_tuple_list(shard),
                *(left_tuple_list(shard)),
            ) for shard in sharded_inputs
        ]

        # log_and_print("Remote tallying.")
        btally = ray_tally_ballots(partial_tally_refs, BALLOTS_PER_SHARD,
                                   progressbar)
        batch_tallies.append(btally)

    # Each batch ultimately yields one partial tally; we add these up here at the
    # very end. If we have a million ballots and have batches of 10k ballots, this
    # would mean we'd have only 100 partial tallies. So, what's here works just fine.
    # If we wanted, we could certainly burn some scalar time and keep a running,
    # singular, partial tally. It's probably more important to push onward to the
    # next batch, so we can do as much work in parallel as possible.

    if len(batch_tallies) > 1:
        tally = ray.get(ray_tally_ballots(batch_tallies, 10, progressbar))
    else:
        tally = ray.get(batch_tallies[0])

    if progressbar:
        progressbar.close()

    assert tally is not None, "tally failed!"

    log_and_print("Tally decryption.")
    decrypted_tally: DECRYPT_TALLY_OUTPUT_TYPE = ray_decrypt_tally(
        tally, r_cec, r_keypair, seed_hash)

    log_and_print("Validating tally.")

    # Sanity-checking logic: make sure we don't have any unexpected keys, and that the decrypted totals
    # match up with the columns in the original plaintext data.
    tally_keys = set(decrypted_tally.keys())
    expected_keys = set(id_map.keys())

    assert tally_keys.issubset(
        expected_keys
    ), f"bad tally keys (actual keys: {sorted(tally_keys)}, expected keys: {sorted(expected_keys)})"

    for obj_id in decrypted_tally.keys():
        cvr_sum = int(cvrs.data[id_map[obj_id]].sum())
        decryption, proof = decrypted_tally[obj_id]
        assert cvr_sum == decryption, f"decryption failed for {obj_id}"

    final_manifest: Optional[Manifest] = None

    if root_dir is not None:
        final_manifest = ray.get(r_manifest_aggregator.result.remote())
        assert isinstance(
            final_manifest,
            Manifest), "type error: bad result from manifest aggregation"

    # Assemble the data structure that we're returning. Having nonces in the ciphertext makes these
    # structures sensitive for writing out to disk, but otherwise they're ready to go.
    log_and_print("Constructing results.")
    reported_tally: Dict[str, SelectionInfo] = {
        k: SelectionInfo(
            object_id=k,
            encrypted_tally=tally[k],
            # we need to forcibly convert mpz to int here to make serialization work properly
            decrypted_tally=int(decrypted_tally[k][0]),
            proof=decrypted_tally[k][1],
        )
        for k in tally.keys()
    }

    tabulate_time = timer()

    log_and_print(
        f"Encryption and tabulation: {rows} ballots, {rows / (tabulate_time - start_time): .3f} ballot/sec",
        verbose,
    )

    return RayTallyEverythingResults(
        metadata=cvrs.metadata,
        cvr_metadata=cvrs.dataframe_without_selections(),
        election_description=ed,
        num_ballots=rows,
        manifest=final_manifest,
        tally=SelectionTally(reported_tally),
        context=cec,
    )
Example #24
0
 def test_elgamal_keypair_from_secret_requires_key_greater_than_one(self):
     self.assertEqual(None, elgamal_keypair_from_secret(ZERO_MOD_Q))
     self.assertEqual(None, elgamal_keypair_from_secret(ONE_MOD_Q))
Example #25
0
def run_bench(filename: str, pool: Pool, file_dir: Optional[str]) -> None:
    start_time = timer()
    print(f"Benchmarking: {filename}")
    log_info(f"Benchmarking: {filename}")
    cvrs = read_dominion_csv(filename)
    if cvrs is None:
        print(f"Failed to read {filename}, terminating.")
        exit(1)
    rows, cols = cvrs.data.shape

    parse_time = timer()
    print(f"    Parse time: {parse_time - start_time: .3f} sec")

    assert rows > 0, "can't have zero ballots!"

    # doesn't matter what the key is, so long as it's consistent for both runs
    keypair = get_optional(elgamal_keypair_from_secret(int_to_q_unchecked(31337)))

    tally_start = timer()
    tally = fast_tally_everything(
        cvrs, pool, verbose=True, secret_key=keypair.secret_key
    )

    if file_dir:
        write_fast_tally(tally, file_dir + "_fast")

    tally_end = timer()
    assert tally.all_proofs_valid(verbose=True), "proof failure!"

    print(f"\nstarting ray.io parallelism")
    rtally_start = timer()
    rtally = ray_tally_everything(
        cvrs,
        secret_key=keypair.secret_key,
        root_dir=file_dir + "_ray" if file_dir else None,
    )
    rtally_end = timer()

    if file_dir:
        rtally_as_fast = rtally.to_fast_tally()
        assert rtally_as_fast.all_proofs_valid(verbose=True), "proof failure!"
        assert tally.equivalent(
            rtally_as_fast, keypair, pool
        ), "tallies aren't equivalent!"

        # Note: tally.equivalent() isn't quite as stringent as asserting that absolutely
        # everything is identical, but it's a pretty good sanity check for our purposes.
        # In tests/test_ray_tally.py, test_ray_and_multiprocessing_agree goes the extra
        # distance to create identical tallies from each system and assert their equality.

    print(f"\nOVERALL PERFORMANCE")
    print(f"    Pool time:   {tally_end - tally_start: .3f} sec")
    print(f"    Pool rate:   {rows / (tally_end - tally_start): .3f} ballots/sec")
    print(f"    Ray time:    {rtally_end - rtally_start : .3f} sec")
    print(f"    Ray rate:    {rows / (rtally_end - rtally_start): .3f} ballots/sec")

    print(
        f"    Ray speedup: {(tally_end - tally_start) / (rtally_end - rtally_start) : .3f} (>1.0 = ray is faster, <1.0 = ray is slower)"
    )

    if file_dir is not None:
        shutil.rmtree(file_dir + "_ray", ignore_errors=True)
        shutil.rmtree(file_dir + "_fast", ignore_errors=True)
Example #26
0
def fast_tally_everything(
    cvrs: DominionCSV,
    pool: Optional[Pool] = None,
    verbose: bool = True,
    date: Optional[datetime] = None,
    seed_hash: Optional[ElementModQ] = None,
    master_nonce: Optional[ElementModQ] = None,
    secret_key: Optional[ElementModQ] = None,
    use_progressbar: bool = True,
) -> FastTallyEverythingResults:
    """
    This top-level function takes a collection of Dominion CVRs and produces everything that
    we might want for arlo-e2e: a list of encrypted ballots, their encrypted and decrypted tally,
    and proofs of the correctness of the whole thing. The election `secret_key` is an optional
    parameter. If absent, a random keypair is generated and used. Similarly, if a `seed_hash` or
    `master_nonce` is not provided, random ones are generated and used.

    For parallelism, a `multiprocessing.pool.Pool` may be provided, and should result in significant
    speedups on multicore computers. If absent, the computation will proceed sequentially.
    """
    rows, cols = cvrs.data.shape

    if date is None:
        date = datetime.now()

    parse_time = timer()
    log_and_print(f"Rows: {rows}, cols: {cols}", verbose)

    ed, ballots, id_map = cvrs.to_election_description(date=date)
    assert len(ballots) > 0, "can't have zero ballots!"

    keypair = (elgamal_keypair_random() if secret_key is None else
               elgamal_keypair_from_secret(secret_key))
    assert keypair is not None, "unexpected failure with keypair computation"
    secret_key, public_key = keypair

    # This computation exists only to cause side-effects in the DLog engine, so the lame nonce is not an issue.
    assert len(ballots) == get_optional(
        elgamal_encrypt(m=len(ballots),
                        nonce=int_to_q_unchecked(3),
                        public_key=public_key)).decrypt(
                            secret_key), "got wrong ElGamal decryption!"

    dlog_prime_time = timer()
    log_and_print(
        f"DLog prime time (n={len(ballots)}): {dlog_prime_time - parse_time: .3f} sec",
        verbose,
    )

    cec = make_ciphertext_election_context(
        number_of_guardians=1,
        quorum=1,
        elgamal_public_key=public_key,
        description_hash=ed.crypto_hash(),
    )

    ied = InternalElectionDescription(ed)

    # REVIEW THIS: is this cryptographically sound? Is the seed_hash properly a secret? Should
    # it go in the output? The nonces are clearly secret. If you know them, you can decrypt.
    if seed_hash is None:
        seed_hash = rand_q()
    if master_nonce is None:
        master_nonce = rand_q()
    nonces: List[ElementModQ] = Nonces(master_nonce)[0:len(ballots)]

    # even if verbose is false, we still want to see the progress bar for the encryption
    cballots = fast_encrypt_ballots(ballots,
                                    ied,
                                    cec,
                                    seed_hash,
                                    nonces,
                                    pool,
                                    use_progressbar=use_progressbar)
    eg_encrypt_time = timer()

    log_and_print(
        f"Encryption time: {eg_encrypt_time - dlog_prime_time: .3f} sec",
        verbose)
    log_and_print(
        f"Encryption rate: {rows / (eg_encrypt_time - dlog_prime_time): .3f} ballot/sec",
        verbose,
    )

    tally: TALLY_TYPE = fast_tally_ballots(cballots, pool)
    eg_tabulate_time = timer()

    log_and_print(
        f"Tabulation time: {eg_tabulate_time - eg_encrypt_time: .3f} sec",
        verbose)
    log_and_print(
        f"Tabulation rate: {rows / (eg_tabulate_time - eg_encrypt_time): .3f} ballot/sec",
        verbose,
    )
    log_and_print(
        f"Encryption and tabulation: {rows} ballots / {eg_tabulate_time - dlog_prime_time: .3f} sec = {rows / (eg_tabulate_time - dlog_prime_time): .3f} ballot/sec",
        verbose,
    )

    assert tally is not None, "tally failed!"

    if verbose:  # pragma: no cover
        print("Decryption & Proofs: ")
    decrypted_tally: DECRYPT_TALLY_OUTPUT_TYPE = fast_decrypt_tally(
        tally, cec, keypair, seed_hash, pool, verbose)
    eg_decryption_time = timer()
    log_and_print(
        f"Decryption time: {eg_decryption_time - eg_tabulate_time: .3f} sec",
        verbose)
    log_and_print(
        f"Decryption rate: {len(decrypted_tally.keys()) / (eg_decryption_time - eg_tabulate_time): .3f} selection/sec",
        verbose,
    )

    # Sanity-checking logic: make sure we don't have any unexpected keys, and that the decrypted totals
    # match up with the columns in the original plaintext data.
    for obj_id in decrypted_tally.keys():
        assert obj_id in id_map, "object_id in results that we don't know about!"
        cvr_sum = int(cvrs.data[id_map[obj_id]].sum())
        decryption, proof = decrypted_tally[obj_id]
        assert cvr_sum == decryption, f"decryption failed for {obj_id}"

    # Assemble the data structure that we're returning. Having nonces in the ciphertext makes these
    # structures sensitive for writing out to disk, but otherwise they're ready to go.
    reported_tally: Dict[str, SelectionInfo] = {
        k: SelectionInfo(
            object_id=k,
            encrypted_tally=tally[k],
            # we need to forcibly convert mpz to int here to make serialization work properly
            decrypted_tally=int(decrypted_tally[k][0]),
            proof=decrypted_tally[k][1],
        )
        for k in tally.keys()
    }

    # strips the ballots of their nonces, which is important because those could allow for decryption
    accepted_ballots = [ciphertext_ballot_to_accepted(x) for x in cballots]

    return FastTallyEverythingResults(
        metadata=cvrs.metadata,
        cvr_metadata=cvrs.dataframe_without_selections(),
        election_description=ed,
        encrypted_ballot_memos={
            ballot.object_id: make_memo_value(ballot)
            for ballot in accepted_ballots
        },
        tally=SelectionTally(reported_tally),
        context=cec,
    )
Example #27
0
    def test_ballot_store(self):

        # Arrange
        keypair = elgamal_keypair_from_secret(int_to_q(2))
        election = election_factory.get_fake_election()
        metadata, context = election_factory.get_fake_ciphertext_election(
            election, keypair.public_key)

        # get an encrypted fake ballot to work with
        fake_ballot = election_factory.get_fake_ballot(metadata)
        encrypted_ballot = encrypt_ballot(fake_ballot, metadata, context,
                                          SEED_HASH)

        # Set up the ballot store
        subject = BallotStore()
        data_cast = CiphertextAcceptedBallot(
            encrypted_ballot.object_id,
            encrypted_ballot.ballot_style,
            encrypted_ballot.description_hash,
            encrypted_ballot.previous_tracking_hash,
            encrypted_ballot.contests,
            encrypted_ballot.tracking_hash,
            encrypted_ballot.timestamp,
        )
        data_cast.state = BallotBoxState.CAST

        data_spoiled = CiphertextAcceptedBallot(
            encrypted_ballot.object_id,
            encrypted_ballot.ballot_style,
            encrypted_ballot.description_hash,
            encrypted_ballot.previous_tracking_hash,
            encrypted_ballot.contests,
            encrypted_ballot.tracking_hash,
            encrypted_ballot.timestamp,
        )
        data_spoiled.state = BallotBoxState.SPOILED

        self.assertIsNone(subject.get("cast"))
        self.assertIsNone(subject.get("spoiled"))

        # try to set a ballot with an unknown state
        self.assertFalse(
            subject.set(
                "unknown",
                CiphertextAcceptedBallot(
                    encrypted_ballot.object_id,
                    encrypted_ballot.ballot_style,
                    encrypted_ballot.description_hash,
                    encrypted_ballot.previous_tracking_hash,
                    encrypted_ballot.contests,
                    encrypted_ballot.tracking_hash,
                    encrypted_ballot.timestamp,
                ),
            ))

        # Act
        self.assertTrue(subject.set("cast", data_cast))
        self.assertTrue(subject.set("spoiled", data_spoiled))

        self.assertEqual(subject.get("cast"), data_cast)
        self.assertEqual(subject.get("spoiled"), data_spoiled)

        self.assertEqual(subject.exists("cast"), (True, data_cast))
        self.assertEqual(subject.exists("spoiled"), (True, data_spoiled))

        # test mutate state
        data_cast.state = BallotBoxState.UNKNOWN
        self.assertEqual(subject.exists("cast"), (False, data_cast))

        # test remove
        self.assertTrue(subject.set("cast", None))
        self.assertEqual(subject.exists("cast"), (False, None))
    speedup: Dict[int, float] = {}
    print(f"CPUs detected: {cpu_count()}, launching thread pool")
    pool = Pool(cpu_count())

    # warm up the pool to help get consistent measurements
    results = pool.map(identity, range(1, 30000))
    assert results == list(range(1, 30000))

    bench_start = timer()

    for size in problem_sizes:
        print("Benchmarking on problem size: ", size)
        seeds = rands[0:size]
        inputs = [
            BenchInput(
                get_optional(elgamal_keypair_from_secret(a)),
                rands[size],
                rands[size + 1],
            ) for a in seeds
        ]
        start_all_scalar = timer()
        timing_data = [chaum_pedersen_bench(i) for i in inputs]
        end_all_scalar = timer()

        print(f"  Creating Chaum-Pedersen proofs ({size} iterations)")
        avg_proof_scalar = average([t[0] for t in timing_data])
        std_proof_scalar = std([t[0] for t in timing_data])
        print(f"    Avg    = {avg_proof_scalar:.6f} sec")
        print(f"    Stddev = {std_proof_scalar:.6f} sec")

        print(f"  Validating Chaum-Pedersen proofs ({size} iterations)")