def assign_sampled_ballots( jurisdiction: Jurisdiction, round: Round, audit_boards: List[AuditBoard], ): # Collect the physical ballots for each batch that were sampled for this # jurisdiction for this round sampled_ballots = ( SampledBallot.query.join(Batch).filter_by( jurisdiction_id=jurisdiction.id).join( SampledBallot.draws).filter_by(round_id=round.id).order_by( Batch.name) # group_by prefers a sorted list .options(contains_eager(SampledBallot.batch)).all()) ballots_by_batch = group_by(sampled_ballots, key=lambda sb: sb.batch.name) # Divvy up batches of ballots between the audit boards. # Note: BalancedBucketList doesn't care which buckets have which batches to # start, so we add all the batches to the first bucket before balancing. buckets = [Bucket(audit_board.id) for audit_board in audit_boards] for batch_name, sampled_ballots in ballots_by_batch.items(): buckets[0].add_batch(batch_name, len(sampled_ballots)) balanced_buckets = BalancedBucketList(buckets) for bucket in balanced_buckets.buckets: ballots_in_bucket = [ ballot for batch_name in bucket.batches for ballot in ballots_by_batch[batch_name] ] for ballot in ballots_in_bucket: ballot.audit_board_id = bucket.name db.session.add(ballot)
def sample_ballots(session: Session, election: Election, round: Round): # assume only one contest round_contest = round.round_contests[0] jurisdiction = election.jurisdictions[0] num_sampled = (session.query(SampledBallotDraw).join(SampledBallot).join( SampledBallot.batch).filter_by( jurisdiction_id=jurisdiction.id).count()) if not num_sampled: num_sampled = 0 chosen_sample_size = round_contest.sample_size # the sampler needs to have the same inputs given the same manifest # so we use the batch name, rather than the batch id # (because the batch ID is an internally generated uuid # that changes from one run to the next.) manifest = {} batch_id_from_name = {} for batch in jurisdiction.batches: manifest[batch.name] = batch.num_ballots batch_id_from_name[batch.name] = batch.id sample = sampler.draw_sample( election.random_seed, manifest, chosen_sample_size, num_sampled=num_sampled, ) audit_boards = jurisdiction.audit_boards batch_sizes: Dict[str, int] = {} batches_to_ballots: Dict[str, List[Tuple[int, str, int]]] = {} # Build batch - batch_size map for (ticket_number, (batch_name, ballot_position), sample_number) in sample: if batch_name in batch_sizes: if (sample_number == 1 ): # if we've already seen it, it doesn't affect batch size batch_sizes[batch_name] += 1 batches_to_ballots[batch_name].append( (ballot_position, ticket_number, sample_number)) else: batch_sizes[batch_name] = 1 batches_to_ballots[batch_name] = [(ballot_position, ticket_number, sample_number)] # Create the buckets and initially assign batches buckets = [Bucket(audit_board.name) for audit_board in audit_boards] for i, batch in enumerate(batch_sizes): buckets[i % len(audit_boards)].add_batch(batch, batch_sizes[batch]) # Now assign batchest fairly bucket_list = BalancedBucketList(buckets) # read audit board and batch info out for audit_board_num, bucket in enumerate(bucket_list.buckets): audit_board = audit_boards[audit_board_num] for batch_name in bucket.batches: for (ballot_position, ticket_number, sample_number) in batches_to_ballots[batch_name]: batch_id = batch_id_from_name[batch_name] if sample_number == 1: sampled_ballot = SampledBallot( id=str(uuid.uuid4()), batch_id=batch_id, ballot_position=ballot_position, audit_board_id=audit_board.id, status=BallotStatus.NOT_AUDITED, ) session.add(sampled_ballot) else: sampled_ballot = SampledBallot.query.filter_by( batch_id=batch_id, ballot_position=ballot_position, ).one() sampled_ballot_draw = SampledBallotDraw( ballot_id=sampled_ballot.id, round_id=round.id, ticket_number=ticket_number, ) session.add(sampled_ballot_draw) session.commit()
def sample_ballots(election, round): # assume only one contest round_contest = round.round_contests[0] jurisdiction = election.jurisdictions[0] num_sampled = db.session.query(db.func.sum(SampledBallot.times_sampled)).filter_by(jurisdiction_id=jurisdiction.id).one()[0] if not num_sampled: num_sampled = 0 chosen_sample_size = round_contest.sample_size sampler = get_sampler(election) # the sampler needs to have the same inputs given the same manifest # so we use the batch name, rather than the batch id # (because the batch ID is an internally generated uuid # that changes from one run to the next.) manifest = {} batch_id_from_name = {} for batch in jurisdiction.batches: manifest[batch.name] = batch.num_ballots batch_id_from_name[batch.name] = batch.id sample = sampler.draw_sample(manifest, chosen_sample_size, num_sampled=num_sampled) audit_boards = jurisdiction.audit_boards last_sample = None last_sampled_ballot = None batch_sizes = {} batches_to_ballots = {} seen_ballot_positions = set() # Build batch - batch_size map for batch_name, ballot_position in sample: batch_id = batch_id_from_name[batch_name] lookup = (batch_id, ballot_position) # Only count ballots once here since it's only pulled once if lookup in seen_ballot_positions: batches_to_ballots[batch_id].append(ballot_position) continue seen_ballot_positions.add(lookup) if batch_id in batch_sizes: batch_sizes[batch_id] += 1 batches_to_ballots[batch_id].append(ballot_position) else: batch_sizes[batch_id] = 1 batches_to_ballots[batch_id] = [ballot_position] # Create the buckets and initially assign batches buckets = [] for audit_board in audit_boards: buckets.append(Bucket(audit_board.name)) for i, batch in enumerate(batch_sizes): buckets[i%len(audit_boards)].add_batch(batch, batch_sizes[batch]) # Now assign batchest fairly bl = BalancedBucketList(buckets) # read audit board and batch info out for bucket in bl.buckets: audit_board_num = bl.buckets.index(bucket) audit_board = audit_boards[audit_board_num] for batch_id in bucket.batches: for ballot_position in batches_to_ballots[batch_id]: if last_sample == (batch_id, ballot_position): last_sampled_ballot.times_sampled += 1 continue sampled_ballot = SampledBallot( round_id = round.id, jurisdiction_id = jurisdiction.id, batch_id = batch_id, ballot_position = ballot_position + 1, # sampler is 0-indexed, we're 1-indexing here times_sampled = 1, audit_board_id = audit_board.id) # keep track for doubly-sampled ballots last_sample = (batch_id, ballot_position) last_sampled_ballot = sampled_ballot db.session.add(sampled_ballot) db.session.commit()
def bucketlist(): buckets = [] b = Bucket('1') b.add_batch('1', 100) b.add_batch('2', 50) buckets.append(b) b = Bucket('2') b.add_batch('3', 100) b.add_batch('4', 150) buckets.append(b) b = Bucket('3') b.add_batch('5', 50) b.add_batch('6', 50) buckets.append(b) b = Bucket('4') b.add_batch('7', 100) b.add_batch('8', 200) buckets.append(b) yield BucketList(buckets)
def bucket(): yield Bucket('1')