def test_draw_more_samples(): # Test getting a sample manifest = { "pct 1": 25, "pct 2": 25, "pct 3": 25, "pct 4": 25, } samp_size = 10 sample = sampler.draw_sample(SEED, manifest, 10, 0) assert samp_size == len(sample), "Received sample of size {}, expected {}".format( samp_size, len(sample) ) for i, item in enumerate(sample): expected = expected_first_sample[i] assert item == expected, "Draw sample failed: got {}, expected {}".format( item, expected ) samp_size = 10 sample = sampler.draw_sample(SEED, manifest, 10, num_sampled=10) assert samp_size == len(sample), "Received sample of size {}, expected {}".format( samp_size, len(sample) ) for i, item in enumerate(sample): expected = expected_second_sample[i] assert item == expected, "Draw sample failed: got {}, expected {}".format( item, expected )
def sample_ballots(election: Election, round: Round, sample_size: int): # For now, we only support one targeted contest targeted_contest = next(c for c in election.contests if c.is_targeted) # Compute the total number of ballot samples in all rounds leading up to # this one. Note that this corresponds to the number of SampledBallotDraws, # not SampledBallots. num_previously_sampled = (SampledBallotDraw.query.join(Round).filter_by( election_id=election.id).count()) # Create the pool of ballots to sample (aka manifest) by combining the # manifests from every jurisdiction in the contest's universe. # Audits must be deterministic and repeatable for the same real world # inputs. So the sampler expects the same input for the same real world # data. Thus, we use the jurisdiction and batch names (deterministic real # world ids) instead of the jurisdiction and batch ids (non-deterministic # uuids that we generate for each audit). manifest = {(jurisdiction.name, batch.name): batch.num_ballots for jurisdiction in targeted_contest.jurisdictions for batch in jurisdiction.batches} batch_key_to_id = {(jurisdiction.name, batch.name): batch.id for jurisdiction in targeted_contest.jurisdictions for batch in jurisdiction.batches} # Do the math! I.e. compute the actual sample sample = sampler.draw_sample(election.random_seed, manifest, sample_size, num_previously_sampled) # Record which ballots are sampled in the db. # Note that a ballot may be sampled more than once (within a round or # across multiple rounds). We create one SampledBallot for each real-world # ballot that gets sampled, and record each time it gets sampled with a # SampledBallotDraw. That way we can ensure that we don't need to actually # look at a real-world ballot that we've already audited, even if it gets # sampled again. for (ticket_number, (batch_key, ballot_position), times_sampled) in sample: batch_id = batch_key_to_id[batch_key] if times_sampled == 1: sampled_ballot = SampledBallot( id=str(uuid.uuid4()), batch_id=batch_id, ballot_position=ballot_position, status=BallotStatus.NOT_AUDITED, ) db.session.add(sampled_ballot) else: sampled_ballot = SampledBallot.query.filter_by( batch_id=batch_id, ballot_position=ballot_position).one() sampled_ballot_draw = SampledBallotDraw( ballot_id=sampled_ballot.id, round_id=round.id, ticket_number=ticket_number, ) db.session.add(sampled_ballot_draw)
def test_draw_sample(): # Test getting a sample manifest = { "pct 1": 25, "pct 2": 25, "pct 3": 25, "pct 4": 25, } sample = sampler.draw_sample(SEED, manifest, 20, 0) for i, item in enumerate(sample): expected = expected_sample[i] assert item == expected, "Draw sample failed: got {}, expected {}".format( item, expected )
def test_ballot_labels(): for _ in range(100): manifest = random_manifest() sample = sampler.draw_sample(SEED, manifest, 100, 0) for (_, (batch, ballot_number), _) in sample: assert 1 <= ballot_number <= manifest[batch]
def sample_ballots(session: Session, election: Election, round: Round): # assume only one contest round_contest = round.round_contests[0] jurisdiction = election.jurisdictions[0] num_sampled = (session.query(SampledBallotDraw).join(SampledBallot).join( SampledBallot.batch).filter_by( jurisdiction_id=jurisdiction.id).count()) if not num_sampled: num_sampled = 0 chosen_sample_size = round_contest.sample_size # the sampler needs to have the same inputs given the same manifest # so we use the batch name, rather than the batch id # (because the batch ID is an internally generated uuid # that changes from one run to the next.) manifest = {} batch_id_from_name = {} for batch in jurisdiction.batches: manifest[batch.name] = batch.num_ballots batch_id_from_name[batch.name] = batch.id sample = sampler.draw_sample( election.random_seed, manifest, chosen_sample_size, num_sampled=num_sampled, ) audit_boards = jurisdiction.audit_boards batch_sizes: Dict[str, int] = {} batches_to_ballots: Dict[str, List[Tuple[int, str, int]]] = {} # Build batch - batch_size map for (ticket_number, (batch_name, ballot_position), sample_number) in sample: if batch_name in batch_sizes: if (sample_number == 1 ): # if we've already seen it, it doesn't affect batch size batch_sizes[batch_name] += 1 batches_to_ballots[batch_name].append( (ballot_position, ticket_number, sample_number)) else: batch_sizes[batch_name] = 1 batches_to_ballots[batch_name] = [(ballot_position, ticket_number, sample_number)] # Create the buckets and initially assign batches buckets = [Bucket(audit_board.name) for audit_board in audit_boards] for i, batch in enumerate(batch_sizes): buckets[i % len(audit_boards)].add_batch(batch, batch_sizes[batch]) # Now assign batchest fairly bucket_list = BalancedBucketList(buckets) # read audit board and batch info out for audit_board_num, bucket in enumerate(bucket_list.buckets): audit_board = audit_boards[audit_board_num] for batch_name in bucket.batches: for (ballot_position, ticket_number, sample_number) in batches_to_ballots[batch_name]: batch_id = batch_id_from_name[batch_name] if sample_number == 1: sampled_ballot = SampledBallot( id=str(uuid.uuid4()), batch_id=batch_id, ballot_position=ballot_position, audit_board_id=audit_board.id, status=BallotStatus.NOT_AUDITED, ) session.add(sampled_ballot) else: sampled_ballot = SampledBallot.query.filter_by( batch_id=batch_id, ballot_position=ballot_position, ).one() sampled_ballot_draw = SampledBallotDraw( ballot_id=sampled_ballot.id, round_id=round.id, ticket_number=ticket_number, ) session.add(sampled_ballot_draw) session.commit()