def test_apply_rules(param_dict): b = BallotMarks(param_dict['input']['marks']) b.apply_rules(**param_dict['input']['rules']) assert b.marks == param_dict['expected']['marks'] assert b.unique_marks == param_dict['expected']['unique_marks'] assert b.unique_candidates == param_dict['expected']['unique_candidates']
def test_combine_writein_marks(param_dict): b = BallotMarks(param_dict['input']) computed = BallotMarks.combine_writein_marks(b) assert computed.marks == param_dict['expected']['marks'] assert computed.unique_marks == param_dict['expected']['unique_marks'] assert computed.unique_candidates == param_dict['expected'][ 'unique_candidates']
def test_remove_duplicate_marks(param_dict): b = BallotMarks(param_dict['input']) computed = BallotMarks.remove_duplicate_candidate_marks(b) assert computed.marks == param_dict['expected']['marks'] assert computed.unique_marks == param_dict['expected']['unique_marks'] assert computed.unique_candidates == param_dict['expected'][ 'unique_candidates']
def test_update_marks(): marks = ['A', 'B', 'B', BallotMarks.OVERVOTE, BallotMarks.SKIPPED] unique_marks = {'A', 'B', BallotMarks.OVERVOTE, BallotMarks.SKIPPED} unique_candidates = {'A', 'B'} b = BallotMarks() b.update_marks(marks) assert b.marks == marks assert b.unique_marks == unique_marks assert b.unique_candidates == unique_candidates
def _make_candidate_set(self, rule_set_name: str) -> None: if rule_set_name not in self._rule_sets: raise RuntimeError(f'rule set {rule_set_name} has not yet been added using add_rule_set().') cvr = self._parsed_cvr # unpack rules rule_set = self._rule_sets[rule_set_name] combine_writeins = rule_set['combine_writein_marks'] exclude_writeins = rule_set['exclude_writein_marks'] candidate_ballot_marks = BallotMarks(set.union(*[b.unique_candidates for b in cvr['ballot_marks']])) candidate_ballot_marks.apply_rules(combine_writein_marks=combine_writeins, exclude_writein_marks=exclude_writeins) self._candidate_sets.update({rule_set_name: candidate_ballot_marks})
def _clean_round(self) -> None: """ Remove any newly inactivated candidates from the ballot ranks. But only remove previous round winners from specified ballots """ winners = [] for inactive_cand in self._inactive_candidates: if inactive_cand not in self._removed_candidates: if inactive_cand in self._round_winners: winners.append(inactive_cand) self._removed_candidates.append(inactive_cand) else: self._contest_cvr_ld = [{ 'ballot_marks': BallotMarks.remove_mark(b['ballot_marks'], [inactive_cand]), 'weight': b['weight'], 'weight_distrib': b['weight_distrib'] } for b in self._contest_cvr_ld] self._removed_candidates.append(inactive_cand) # remove all other candidates before winners. This mostly matters in the first round when all # zero-vote candidates are removed. That is the only time a loser and a winner might both be inactivated in the # same round. It may also happen in the last round, but transfer calculations at that point have no impact. remove_bool_lists = [] for winner in winners: remove_bool_lists.append( self._removal_ballots(winner, default_as_true=True)) for winner, to_remove in zip(winners, remove_bool_lists): self._contest_cvr_ld = [ { 'ballot_marks': BallotMarks.remove_mark(b['ballot_marks'], [inactive_cand]), 'weight': b['weight'], 'weight_distrib': b['weight_distrib'] } if is_remove else b for b, is_remove in zip(self._contest_cvr_ld, to_remove) ]
def __init__(self, jurisdiction: str = "", state: str = "", year: str = "", date: str = "", office: str = "", notes: str = "", parser_func: Optional[Callable] = None, parser_args: Optional[Dict] = None, parsed_cvr: Optional[Dict] = None, split_fields: Optional[List] = None) -> None: # ID INFO self.jurisdiction = jurisdiction self.state = state self.date = date self.year = year self.office = office self.notes = notes self.split_fields = split_fields self.unique_id = self._unique_id() self._id_df = pd.DataFrame({ 'jurisdiction': [self.jurisdiction], 'state': [self.state], 'date': [self.date], 'year': [self.year], 'office': [self.office], 'notes': [self.notes], 'unique_id': [self.unique_id] }) self._parsed_cvr = self._prepare_parsed_cvr(parser_func=parser_func, parser_args=parser_args, parsed_cvr=parsed_cvr) self._modified_cvrs = {} self._candidate_sets = {} self._rule_sets = {} # make a default rule set that is just the parsed cvr self._default_rule_set_name = '__cvr' self.add_rule_set(self._default_rule_set_name, BallotMarks.new_rule_set()) # STAT INFO self._cvr_stat_table = None self._compute_cvr_stat_table() self._summary_cvr_stat_table = None self._compute_summary_cvr_stat_table() self._split_filter_dict = {} self._summary_cvr_split_stat_table = None
def _clean_round(self) -> None: """ Remove any newly inactivated candidates from the ballot ranks. """ for inactive_cand in self._inactive_candidates: if inactive_cand not in self._removed_candidates: self._contest_cvr_ld = [{ 'ballot_marks': BallotMarks.remove_mark(b['ballot_marks'], [inactive_cand]), 'weight': b['weight'], 'weight_distrib': b['weight_distrib'] } for b in self._contest_cvr_ld] self._removed_candidates.append(inactive_cand)
def _prepare_parsed_cvr(self, parser_func: Optional[Callable] = None, parser_args: Optional[Dict] = None, parsed_cvr: Optional[Dict[str, List]] = None) -> Dict[str, List]: if parser_func and parser_args: parsed_cvr = parser_func(**parser_args) if not parsed_cvr: raise ValueError('if no parser_func and parser_args are passed, a parsed_cvr must be passed.') # if parser returns a list, assume it is rank list of lists if isinstance(parsed_cvr, list): parsed_cvr = { 'ranks': parsed_cvr, 'weight': [decimal.Decimal('1')] * len(parsed_cvr) } if 'ranks' not in parsed_cvr: raise RuntimeError('Parsed CVR does not contain field "ranks"') if len(parsed_cvr['ranks']) == 0: raise RuntimeError('parsed ranks list is empty.') if 'weight' not in parsed_cvr: parsed_cvr['weight'] = [decimal.Decimal('1') for _ in parsed_cvr['ranks']] if not isinstance(parsed_cvr['weight'][0], decimal.Decimal): parsed_cvr['weight'] = [decimal.Decimal(str(i)) for i in parsed_cvr['weight']] parsed_cvr['ballot_marks'] = [BallotMarks(ranks) for ranks in parsed_cvr['ranks']] del parsed_cvr['ranks'] ballot_lengths = collections.Counter(len(b.marks) for b in parsed_cvr['ballot_marks']) if len(ballot_lengths) > 1: raise RuntimeError(f'Parsed CVR contains ballots with unequal length rank lists. {str(ballot_lengths)}') field_lengths = {k: len(parsed_cvr[k]) for k in parsed_cvr} if len(set(field_lengths.values())) > 1: raise RuntimeError(f'Parsed CVR contains fields of unequal length. {str(field_lengths)}') return parsed_cvr
def test_remove_duplicate_marks_errors(error_type, input): with pytest.raises(error_type): BallotMarks.remove_duplicate_candidate_marks(input)
def test_combine_writein_marks_errors(error_type, input): with pytest.raises(error_type): BallotMarks.combine_writein_marks(input)
def test_check_writein_match(param_dict): computed = BallotMarks.check_writein_match(param_dict['input']) expected = param_dict['expected'] assert expected == computed
assert computed.marks == param_dict['expected']['marks'] assert computed.unique_marks == param_dict['expected']['unique_marks'] assert computed.unique_candidates == param_dict['expected'][ 'unique_candidates'] param_dicts = [ ({ 'input': { 'marks': [ 'A', 'B', 'B', 'writein10', 'Tuwi', BallotMarks.WRITEIN, 'uwi', BallotMarks.OVERVOTE ], 'rules': BallotMarks.new_rule_set() }, 'expected': { 'marks': [ 'A', 'B', 'B', 'writein10', 'Tuwi', BallotMarks.WRITEIN, 'uwi', BallotMarks.OVERVOTE ], 'unique_marks': { 'A', 'B', 'writein10', 'Tuwi', BallotMarks.WRITEIN, 'uwi', BallotMarks.OVERVOTE }, 'unique_candidates': {'A', 'B', 'writein10', 'Tuwi', 'uwi', BallotMarks.WRITEIN} } }), ({
def _compute_cvr_stat_table(self) -> None: cvr = self.get_cvr_dict() candidates = self.get_candidates() df = pd.DataFrame() df['weight'] = cvr['weight'] df['valid_ranks_used'] = [len(b.unique_candidates) for b in cvr['ballot_marks']] df['ranks_used_times_weight'] = df['valid_ranks_used'] * df['weight'] df['used_last_rank'] = [True if b.marks[-1] != BallotMarks.SKIPPED else False for b in cvr['ballot_marks']] df['undervote'] = [b.unique_marks == {BallotMarks.SKIPPED} for b in cvr['ballot_marks']] df['ranked_single'] = df['valid_ranks_used'] == 1 df['ranked_multiple'] = df['valid_ranks_used'] > 1 df['ranked_3_or_more'] = df['valid_ranks_used'] > 2 ballot_marks_no_skipped = [BallotMarks.remove_mark(b, [BallotMarks.SKIPPED]) for b in cvr['ballot_marks']] first_round = [b.marks[0] if b.marks else 'NA' for b in ballot_marks_no_skipped] df['first_round'] = pd.Series(first_round, dtype='category') df['first_round_overvote'] = df['first_round'].eq(BallotMarks.OVERVOTE) df['contains_overvote'] = [BallotMarks.OVERVOTE in b.unique_marks for b in cvr['ballot_marks']] # contains_skipped # {SKIPVOTE} & {x} - {y} # this checks that x == SKIPVOTE and that y then != SKIPVOTE # (the y check is important to know whether or not the ballot contains marks # following the skipped rank) df['contains_skip'] = [any({BallotMarks.SKIPPED} & {x} - {y} for x, y in zip(b.marks, b.marks[1:])) for b in cvr['ballot_marks']] # contains_duplicate # remove overvotes and undervotes dup_check = [BallotMarks.remove_mark(b, [BallotMarks.SKIPPED, BallotMarks.OVERVOTE]) for b in cvr['ballot_marks']] # count all ranks for candidates counters = [collections.Counter(b.marks) for b in dup_check] # check if any candidates were ranked more than once df['contains_duplicate'] = [max(counter.values()) > 1 if counter else False for counter in counters] irregular_condtions = ['contains_overvote', 'contains_skip', 'contains_duplicate'] df['irregular'] = df[irregular_condtions].any(axis='columns') # fully_ranked candidates_combined_writeins = BallotMarks.combine_writein_marks(candidates) candidates_excluded_writeins = BallotMarks.remove_mark(candidates_combined_writeins, [BallotMarks.WRITEIN]) candidate_set = candidates_excluded_writeins.unique_candidates ballot_marks_cleaned = [BallotMarks.remove_mark(b, [BallotMarks.OVERVOTE, BallotMarks.SKIPPED]) for b in cvr['ballot_marks']] ballot_marks_cleaned = [BallotMarks.remove_duplicate_candidate_marks(b) for b in ballot_marks_cleaned] fully_ranked = [(set(b.marks) & candidate_set) == candidate_set or # voters ranked every possible candidate len(a.marks) == len(b.marks) # or did not, had no skipped ranks, overvotes, or duplicates for a, b in zip(cvr['ballot_marks'], ballot_marks_cleaned)] df['fully_ranked'] = fully_ranked self._cvr_stat_table = df
def _compute_summary_cvr_stat_table(self) -> None: cvr = self.get_cvr_dict() candidates = self.get_candidates() s = pd.Series(dtype=object) candidates_no_writeins = BallotMarks.remove_mark(BallotMarks.combine_writein_marks(candidates), [BallotMarks.WRITEIN]) s['n_candidates'] = len(candidates_no_writeins.marks) s['rank_limit'] = len(cvr['ballot_marks'][0].marks) s['restrictive_rank_limit'] = True if s['rank_limit'] < (s['n_candidates'] - 1) else False # first_round_overvote # The number of ballots with an overvote before any valid ranking. (weighted) # Note that this is not the same as "exhausted by overvote". This is because # some jurisdictions (Maine) discard any ballot beginning with two # skipped rankings, and call this ballot as exhausted by skipped rankings, even if the # skipped rankings are followed by an overvote. # Other jursidictions (Minneapolis) simply skip over overvotes in a ballot. s['first_round_overvote'] = self._cvr_stat_table.loc[self._cvr_stat_table['first_round_overvote'], 'weight'].sum() # The number of voters that validly used only a single ranking. (weighted) s['ranked_single'] = self._cvr_stat_table.loc[self._cvr_stat_table['ranked_single'], 'weight'].sum() # The number of voters that validly used 3 or more rankings. (weighted) s['ranked_3_or_more'] = self._cvr_stat_table.loc[self._cvr_stat_table['ranked_3_or_more'], 'weight'].sum() # The number of voters that validly use more than one ranking. (weighted) s['ranked_multiple'] = self._cvr_stat_table.loc[self._cvr_stat_table['ranked_multiple'], 'weight'].sum() # The number of voters that have validly used all available rankings on the # ballot, or that have validly ranked all non-write-in candidates. (weighted) s['total_fully_ranked'] = self._cvr_stat_table.loc[self._cvr_stat_table['fully_ranked'], 'weight'].sum() # The number of ballots that rank the same candidate more than once. (weighted) s['includes_duplicate_ranking'] = self._cvr_stat_table.loc[self._cvr_stat_table['contains_duplicate'], 'weight'].sum() # The number of ballots that have an skipped ranking followed by any other marked ranking. (weighted) s['includes_skipped_ranking'] = self._cvr_stat_table.loc[self._cvr_stat_table['contains_skip'], 'weight'].sum() # This includes ballots with no marks. (weighted) s['total_ballots'] = self._cvr_stat_table['weight'].sum() # Number of ballots that either had a multiple ranking, overvote, # or a skipped ranking (only those followed by a mark). This includes ballots even where the irregularity was not # the cause of exhaustion. (weighted) s['total_irregular'] = self._cvr_stat_table.loc[self._cvr_stat_table['irregular'], 'weight'].sum() # Number of ballots with at least one overvote. Not necessarily cause of exhaustion. (weighted) s['includes_overvote_ranking'] = self._cvr_stat_table.loc[self._cvr_stat_table['contains_overvote'], 'weight'].sum() # Ballots completely made up of skipped rankings (no marks). (weighted) s['total_undervote'] = self._cvr_stat_table.loc[self._cvr_stat_table['undervote'], 'weight'].sum() # Mean number of validly used rankings across all non-undervote ballots. (weighted) weighted_sum = self._cvr_stat_table.loc[~self._cvr_stat_table['undervote'], 'ranks_used_times_weight'].sum() s['mean_rankings_used'] = weighted_sum / self._cvr_stat_table.loc[~self._cvr_stat_table['undervote'], 'weight'].sum() # Median number of validly used rankings across all non-undervote ballots. (weighted) # s['median_rankings_used'] = self._cvr_stat_table.loc[~self._cvr_stat_table['undervote'], 'ranks_used_times_weight'].median() ranks_used = self._cvr_stat_table.loc[~self._cvr_stat_table['undervote'], 'valid_ranks_used'].tolist() weights = self._cvr_stat_table.loc[~self._cvr_stat_table['undervote'], 'weight'].tolist() weights_float = [float(i) for i in weights] s['median_rankings_used'] = weightedstats.weighted_median(ranks_used, weights=weights_float) self._summary_cvr_stat_table = s.to_frame().transpose()
def __init__( self, exhaust_on_duplicate_candidate_marks: bool = False, exhaust_on_overvote_marks: bool = False, exhaust_on_repeated_skipped_marks: bool = False, treat_combined_writeins_as_exhaustable_duplicates: bool = True, combine_writein_marks: bool = True, exclude_writein_marks: bool = False, n_winners: Optional[int] = None, multi_winner_rounds: Optional[bool] = None, *args, **kwargs) -> None: # INIT CVR super().__init__(*args, **kwargs) # APPLY CONTEST RULES self._contest_rule_set_name = '__contest' self.add_rule_set( self._contest_rule_set_name, BallotMarks.new_rule_set( combine_writein_marks=combine_writein_marks, exclude_writein_marks=exclude_writein_marks, exclude_duplicate_candidate_marks=True, exclude_overvote_marks=True, exclude_skipped_marks=True, treat_combined_writeins_as_exhaustable_duplicates= treat_combined_writeins_as_exhaustable_duplicates, exhaust_on_duplicate_candidate_marks= exhaust_on_duplicate_candidate_marks, exhaust_on_overvote_marks=exhaust_on_overvote_marks, exhaust_on_repeated_skipped_marks= exhaust_on_repeated_skipped_marks)) # CONTEST INPUTS self._n_winners = n_winners self._multi_winner_rounds = multi_winner_rounds self._contest_candidates = self.get_candidates( self._contest_rule_set_name) self._contest_cvr_ld = None self._reset_ballots() # INIT STATE INFO # contest-level self._tab_num = 0 self._tabulations = [] # tabulation-level self._inactive_candidates = [] self._removed_candidates = [] # round-level self._round_num = 0 self._round_winners = [] self._round_loser = None # RUN self._run_contest() # CONTEST STATS self._contest_stat_table = None self._compute_contest_stat_table() self._summary_contest_stat_tables = None self._compute_summary_contest_stat_tables() self._summary_contest_split_stat_tables = None
def test_constructor_errors(error_type, input): with pytest.raises(error_type): BallotMarks(input)
def test_apply_rules_errors(): with pytest.raises(RuntimeError): b = BallotMarks(['A', 'B', 'C']) b.apply_rules() b.apply_rules()
def test_remove_mark_errors(error_type, input1, input2): with pytest.raises(error_type): BallotMarks.remove_mark(input1, input2)