Example #1
0
def test_draw_more_samples():
    # Test getting a sample
    manifest = {
        "pct 1": 25,
        "pct 2": 25,
        "pct 3": 25,
        "pct 4": 25,
    }

    samp_size = 10
    sample = sampler.draw_sample(SEED, manifest, 10, 0)
    assert samp_size == len(sample), "Received sample of size {}, expected {}".format(
        samp_size, len(sample)
    )

    for i, item in enumerate(sample):
        expected = expected_first_sample[i]
        assert item == expected, "Draw sample failed: got {}, expected {}".format(
            item, expected
        )

    samp_size = 10
    sample = sampler.draw_sample(SEED, manifest, 10, num_sampled=10)
    assert samp_size == len(sample), "Received sample of size {}, expected {}".format(
        samp_size, len(sample)
    )
    for i, item in enumerate(sample):
        expected = expected_second_sample[i]
        assert item == expected, "Draw sample failed: got {}, expected {}".format(
            item, expected
        )
Example #2
0
def sample_ballots(election: Election, round: Round, sample_size: int):
    # For now, we only support one targeted contest
    targeted_contest = next(c for c in election.contests if c.is_targeted)

    # Compute the total number of ballot samples in all rounds leading up to
    # this one. Note that this corresponds to the number of SampledBallotDraws,
    # not SampledBallots.
    num_previously_sampled = (SampledBallotDraw.query.join(Round).filter_by(
        election_id=election.id).count())

    # Create the pool of ballots to sample (aka manifest) by combining the
    # manifests from every jurisdiction in the contest's universe.
    # Audits must be deterministic and repeatable for the same real world
    # inputs. So the sampler expects the same input for the same real world
    # data. Thus, we use the jurisdiction and batch names (deterministic real
    # world ids) instead of the jurisdiction and batch ids (non-deterministic
    # uuids that we generate for each audit).
    manifest = {(jurisdiction.name, batch.name): batch.num_ballots
                for jurisdiction in targeted_contest.jurisdictions
                for batch in jurisdiction.batches}
    batch_key_to_id = {(jurisdiction.name, batch.name): batch.id
                       for jurisdiction in targeted_contest.jurisdictions
                       for batch in jurisdiction.batches}

    # Do the math! I.e. compute the actual sample
    sample = sampler.draw_sample(election.random_seed, manifest, sample_size,
                                 num_previously_sampled)

    # Record which ballots are sampled in the db.
    # Note that a ballot may be sampled more than once (within a round or
    # across multiple rounds). We create one SampledBallot for each real-world
    # ballot that gets sampled, and record each time it gets sampled with a
    # SampledBallotDraw. That way we can ensure that we don't need to actually
    # look at a real-world ballot that we've already audited, even if it gets
    # sampled again.
    for (ticket_number, (batch_key, ballot_position), times_sampled) in sample:
        batch_id = batch_key_to_id[batch_key]
        if times_sampled == 1:
            sampled_ballot = SampledBallot(
                id=str(uuid.uuid4()),
                batch_id=batch_id,
                ballot_position=ballot_position,
                status=BallotStatus.NOT_AUDITED,
            )
            db.session.add(sampled_ballot)
        else:
            sampled_ballot = SampledBallot.query.filter_by(
                batch_id=batch_id, ballot_position=ballot_position).one()

        sampled_ballot_draw = SampledBallotDraw(
            ballot_id=sampled_ballot.id,
            round_id=round.id,
            ticket_number=ticket_number,
        )
        db.session.add(sampled_ballot_draw)
Example #3
0
def test_draw_sample():
    # Test getting a sample
    manifest = {
        "pct 1": 25,
        "pct 2": 25,
        "pct 3": 25,
        "pct 4": 25,
    }

    sample = sampler.draw_sample(SEED, manifest, 20, 0)

    for i, item in enumerate(sample):
        expected = expected_sample[i]
        assert item == expected, "Draw sample failed: got {}, expected {}".format(
            item, expected
        )
Example #4
0
def test_ballot_labels():
    for _ in range(100):
        manifest = random_manifest()
        sample = sampler.draw_sample(SEED, manifest, 100, 0)
        for (_, (batch, ballot_number), _) in sample:
            assert 1 <= ballot_number <= manifest[batch]
Example #5
0
def sample_ballots(session: Session, election: Election, round: Round):
    # assume only one contest
    round_contest = round.round_contests[0]
    jurisdiction = election.jurisdictions[0]

    num_sampled = (session.query(SampledBallotDraw).join(SampledBallot).join(
        SampledBallot.batch).filter_by(
            jurisdiction_id=jurisdiction.id).count())
    if not num_sampled:
        num_sampled = 0

    chosen_sample_size = round_contest.sample_size

    # the sampler needs to have the same inputs given the same manifest
    # so we use the batch name, rather than the batch id
    # (because the batch ID is an internally generated uuid
    #  that changes from one run to the next.)
    manifest = {}
    batch_id_from_name = {}
    for batch in jurisdiction.batches:
        manifest[batch.name] = batch.num_ballots
        batch_id_from_name[batch.name] = batch.id

    sample = sampler.draw_sample(
        election.random_seed,
        manifest,
        chosen_sample_size,
        num_sampled=num_sampled,
    )

    audit_boards = jurisdiction.audit_boards

    batch_sizes: Dict[str, int] = {}
    batches_to_ballots: Dict[str, List[Tuple[int, str, int]]] = {}
    # Build batch - batch_size map
    for (ticket_number, (batch_name, ballot_position),
         sample_number) in sample:
        if batch_name in batch_sizes:
            if (sample_number == 1
                ):  # if we've already seen it, it doesn't affect batch size
                batch_sizes[batch_name] += 1
            batches_to_ballots[batch_name].append(
                (ballot_position, ticket_number, sample_number))
        else:
            batch_sizes[batch_name] = 1
            batches_to_ballots[batch_name] = [(ballot_position, ticket_number,
                                               sample_number)]

    # Create the buckets and initially assign batches
    buckets = [Bucket(audit_board.name) for audit_board in audit_boards]
    for i, batch in enumerate(batch_sizes):
        buckets[i % len(audit_boards)].add_batch(batch, batch_sizes[batch])

    # Now assign batchest fairly
    bucket_list = BalancedBucketList(buckets)

    # read audit board and batch info out
    for audit_board_num, bucket in enumerate(bucket_list.buckets):
        audit_board = audit_boards[audit_board_num]
        for batch_name in bucket.batches:

            for (ballot_position, ticket_number,
                 sample_number) in batches_to_ballots[batch_name]:
                batch_id = batch_id_from_name[batch_name]

                if sample_number == 1:
                    sampled_ballot = SampledBallot(
                        id=str(uuid.uuid4()),
                        batch_id=batch_id,
                        ballot_position=ballot_position,
                        audit_board_id=audit_board.id,
                        status=BallotStatus.NOT_AUDITED,
                    )
                    session.add(sampled_ballot)
                else:
                    sampled_ballot = SampledBallot.query.filter_by(
                        batch_id=batch_id,
                        ballot_position=ballot_position,
                    ).one()

                sampled_ballot_draw = SampledBallotDraw(
                    ballot_id=sampled_ballot.id,
                    round_id=round.id,
                    ticket_number=ticket_number,
                )

                session.add(sampled_ballot_draw)

    session.commit()