def test_remove_duplicate_marks(param_dict):

    b = BallotMarks(param_dict['input'])
    computed = BallotMarks.remove_duplicate_candidate_marks(b)

    assert computed.marks == param_dict['expected']['marks']
    assert computed.unique_marks == param_dict['expected']['unique_marks']
    assert computed.unique_candidates == param_dict['expected'][
        'unique_candidates']
Example #2
0
    def _compute_cvr_stat_table(self) -> None:

        cvr = self.get_cvr_dict()
        candidates = self.get_candidates()

        df = pd.DataFrame()
        df['weight'] = cvr['weight']

        df['valid_ranks_used'] = [len(b.unique_candidates) for b in cvr['ballot_marks']]
        df['ranks_used_times_weight'] = df['valid_ranks_used'] * df['weight']

        df['used_last_rank'] = [True if b.marks[-1] != BallotMarks.SKIPPED else False for b in cvr['ballot_marks']]

        df['undervote'] = [b.unique_marks == {BallotMarks.SKIPPED} for b in cvr['ballot_marks']]
        df['ranked_single'] = df['valid_ranks_used'] == 1
        df['ranked_multiple'] = df['valid_ranks_used'] > 1
        df['ranked_3_or_more'] = df['valid_ranks_used'] > 2

        ballot_marks_no_skipped = [BallotMarks.remove_mark(b, [BallotMarks.SKIPPED]) for b in cvr['ballot_marks']]
        first_round = [b.marks[0] if b.marks else 'NA' for b in ballot_marks_no_skipped]
        df['first_round'] = pd.Series(first_round, dtype='category')

        df['first_round_overvote'] = df['first_round'].eq(BallotMarks.OVERVOTE)

        df['contains_overvote'] = [BallotMarks.OVERVOTE in b.unique_marks for b in cvr['ballot_marks']]

        # contains_skipped
        # {SKIPVOTE} & {x} - {y}
        # this checks that x == SKIPVOTE and that y then != SKIPVOTE
        # (the y check is important to know whether or not the ballot contains marks
        # following the skipped rank)
        df['contains_skip'] = [any({BallotMarks.SKIPPED} & {x} - {y} for x, y in zip(b.marks, b.marks[1:]))
                               for b in cvr['ballot_marks']]

        # contains_duplicate
        # remove overvotes and undervotes
        dup_check = [BallotMarks.remove_mark(b, [BallotMarks.SKIPPED, BallotMarks.OVERVOTE]) for b in cvr['ballot_marks']]
        # count all ranks for candidates
        counters = [collections.Counter(b.marks) for b in dup_check]
        # check if any candidates were ranked more than once
        df['contains_duplicate'] = [max(counter.values()) > 1 if counter else False for counter in counters]

        irregular_condtions = ['contains_overvote', 'contains_skip', 'contains_duplicate']
        df['irregular'] = df[irregular_condtions].any(axis='columns')

        # fully_ranked
        candidates_combined_writeins = BallotMarks.combine_writein_marks(candidates)
        candidates_excluded_writeins = BallotMarks.remove_mark(candidates_combined_writeins, [BallotMarks.WRITEIN])
        candidate_set = candidates_excluded_writeins.unique_candidates

        ballot_marks_cleaned = [BallotMarks.remove_mark(b, [BallotMarks.OVERVOTE, BallotMarks.SKIPPED])
                                for b in cvr['ballot_marks']]
        ballot_marks_cleaned = [BallotMarks.remove_duplicate_candidate_marks(b) for b in ballot_marks_cleaned]

        fully_ranked = [(set(b.marks) & candidate_set) == candidate_set or
                        # voters ranked every possible candidate
                        len(a.marks) == len(b.marks)
                        # or did not, had no skipped ranks, overvotes, or duplicates
                        for a, b in zip(cvr['ballot_marks'], ballot_marks_cleaned)]
        df['fully_ranked'] = fully_ranked

        self._cvr_stat_table = df
def test_remove_duplicate_marks_errors(error_type, input):

    with pytest.raises(error_type):
        BallotMarks.remove_duplicate_candidate_marks(input)