def read_reported_cvrs(e): """ Read reported votes 22-reported-cvrs/reported-cvrs-PBCID.csv. """ election_pathname = os.path.join(multi.ELECTIONS_ROOT, e.election_dirname) specification_pathname = os.path.join(election_pathname, "2-reported", "22-reported-cvrs") fieldnames = [ "Collection", "Scanner", "Ballot id", "Contest", "Selections" ] for pbcid in e.pbcids: safe_pbcid = ids.filename_safe(pbcid) filename = utils.greatest_name(specification_pathname, "reported-cvrs-" + safe_pbcid, ".csv") file_pathname = os.path.join(specification_pathname, filename) rows = csv_readers.read_csv_file(file_pathname, fieldnames, varlen=True) for row in rows: pbcid = row["Collection"] scanner = row["Scanner"] bid = row["Ballot id"] cid = row["Contest"] vote = row["Selections"] vote = tuple(sorted(vote)) # put vote selids into canonical order utils.nested_set(e.rv_cpb, [cid, pbcid, bid], vote) utils.nested_set(e.votes_c, [cid, vote], True)
def generate_rv_cpb(e, synpar): """ Generate the reported selection for each contest and ballot. That is, populate rv_cpb, by drawing from selids_c[cid] for each cid. """ e.rv_cpb = {} for pbcid in e.pbcids: for bid in e.bids_p[pbcid]: for cid in synpar.cids_b[bid]: selids = list(e.selids_c[cid]) if e.contest_type_c[cid] == 'plurality': # give min(selids) an "edge" (expected margin) for winning if synpar.RandomState.uniform() <= synpar.margin: selection = min(selids) else: selection = synpar.RandomState.choice(selids) rv = (selection, ) utils.nested_set(e.rv_cpb, [cid, pbcid, bid], rv) else: # assume otherwise that vote is permutation of selids # (This will need refinement later presumably.) rv = list(selids) synpar.RandomState.shuffle(rv) utils.nested_set(e.rv_cpb, [cid, pbcid, bid], rv)
def read_reported_outcomes(e): election_pathname = os.path.join(multi.ELECTIONS_ROOT, e.election_dirname) specification_pathname = os.path.join(election_pathname, "2-reported") fieldnames = ["Contest", "Winner(s)"] filename = utils.greatest_name(specification_pathname, "23-reported-outcomes", ".csv") file_pathname = os.path.join(specification_pathname, filename) rows = csv_readers.read_csv_file(file_pathname, fieldnames, varlen=True) for row in rows: cid = row["Contest"] winners = row["Winner(s)"] utils.nested_set(e.ro_c, [cid], winners)
def generate_audited_votes(e, synpar): e.av_cpb = {} for cid in e.rv_cpb: for pbcid in e.rv_cpb[cid]: for bid in e.rv_cpb[cid][pbcid]: rv = e.rv_cpb[cid][pbcid][bid] av = e.rv_cpb[cid][pbcid][bid] # default no error if (synpar.RandomState.uniform() <= synpar.error_rate): selids = list(e.selids_c[cid]) if rv in selids and len(selids) > 1: selids.remove(rv) av = (synpar.RandomState.choice(selids), ) utils.nested_set(e.av_cpb, [cid, pbcid, bid], av)
def finish_election_spec_contest_groups(e): groups.expand_contest_group_defs(e) for pbcid in e.pbcids: e.possible_cid_p[pbcid] = {} e.required_cid_p[pbcid] = {} for cid in e.cids: e.possible_pbcid_c[cid] = {} e.required_pbcid_c[cid] = {} for pbcid in e.pbcids: req_gid = e.required_gid_p[pbcid] poss_gid = e.possible_gid_p[pbcid] for cid in e.cids: # "" req_gid means nothing is required. if req_gid != "" and cid in e.cids_g[req_gid]: utils.nested_set(e.required_cid_p, [pbcid, cid], "True") utils.nested_set(e.required_pbcid_c, [cid, pbcid], "True") # enforce that possible contests includes all contests # "" poss_gid means everything is possible if poss_gid == "" or cid in e.cids_g[poss_gid] or cid in e.cids_g[ req_gid]: utils.nested_set(e.possible_cid_p, [pbcid, cid], "True") utils.nested_set(e.possible_pbcid_c, [cid, pbcid], "True")
def check_reported_selids(e): # make sure e.selids_c[cid] contains all +/- selids seen in reported votes # and that e.votes_c[cid] contains all reported votes for cid in e.cids: for pbcid in e.possible_pbcid_c[cid]: for bid in e.bids_p[pbcid]: if bid in e.rv_cpb[cid][pbcid]: rv = e.rv_cpb[cid][pbcid][bid] else: rv = ("-NoSuchContest", ) utils.nested_set(e.votes_c, [cid, rv], True) for selid in rv: if ids.is_writein(selid) or ids.is_error_selid(selid): e.selids_c[cid][selid] = True
def generate_election_spec_contests(e, synpar): # check number of contests assert isinstance(synpar.n_cids, int) and synpar.n_cids >= 1 # make cid for each contest e.cids = set("con{}".format(i + 1) for i in range(synpar.n_cids)) # generate contest types as plurality and additional parameters # no write-ins for cid in e.cids: e.contest_type_c[cid] = "plurality" e.params_c[cid] = "" e.write_ins_c[cid] = "no" # check number of cids with wrong reported outcome assert isinstance(synpar.n_cids_wrong, int) assert 0 <= synpar.n_cids_wrong <= synpar.n_cids # determine which, if any, cids have wrong reported outcome cids_list = list(e.cids) synpar.RandomState.shuffle(cids_list) # in-place synpar.cids_wrong = cids_list[:synpar.n_cids_wrong] # generate selids for each cid e.n_selids_c = {} e.selids_c = {} for cid in e.cids: e.n_selids_c[cid] = syn.geospace_choice(e, synpar, synpar.min_n_selids_per_cid, synpar.max_n_selids_per_cid) e.selids_c[cid] = { "sel{}".format(i): True for i in range(1, e.n_selids_c[cid] + 1) } # generate possible votes for each cid for cid in e.cids: if e.contest_type_c[cid] == "plurality": for selid in e.selids_c[cid]: utils.nested_set(e.votes_c, [cid, (selid, )], True) else: utils.myerror(("Contest {} is not plurality---" "Can't generate votes for it.").format(cid))
def vmsg2dict(vmsg): """ Converts fixed VMSG to dict. :param vmsg: multiline (str) with fixed VMSG :return: (dict) """ dict_ = {} metas = [] for line in vmsg.splitlines(): splt = line.split(":", 1) key = splt[0] value = splt[1] if key == "BEGIN": metas.append(value) continue if key == "END": metas.remove(value) continue nested_set(dict_, metas + [key], value) return dict_
def read_audited_votes(e): """ Read audited votes from 3-audit/33-audited-votes/audited-votes-PBCID.csv """ election_pathname = os.path.join(multi.ELECTIONS_ROOT, e.election_dirname) audited_votes_pathname = os.path.join(election_pathname, "3-audit", "33-audited-votes") for pbcid in e.pbcids: safe_pbcid = ids.filename_safe(pbcid) filename = utils.greatest_name(audited_votes_pathname, "audited-votes-" + safe_pbcid, ".csv") file_pathname = os.path.join(audited_votes_pathname, filename) fieldnames = ["Collection", "Ballot id", "Contest", "Selections"] rows = csv_readers.read_csv_file(file_pathname, fieldnames, varlen=True) for row in rows: pbcid = row["Collection"] bid = row["Ballot id"] cid = row["Contest"] vote = row["Selections"] utils.nested_set(e.av_cpb, [cid, pbcid, bid], vote)
def reported_number_cpr(filename): rn_cpr = dict() with open(filename, 'r', encoding="utf8") as f: reader = csv.reader(f, delimiter='|') for (i, row) in enumerate(reader): if i != 0: filtered_ids = list( filter(lambda a: a != '', row[0].split(','))) pbcid = filtered_ids[0] cid = filtered_ids[3] selection = tuple(filtered_ids[4:]) #this is probably not efficient at all if cid in rn_cpr: if pbcid in rn_cpr[cid]: if selection in rn_cpr[cid][pbcid]: rn_cpr[cid][pbcid][selection] += 1 else: utils.nested_set(rn_cpr, [cid, pbcid, selection], 1) else: utils.nested_set(rn_cpr, [cid, pbcid, selection], 1) else: utils.nested_set(rn_cpr, [cid, pbcid, selection], 1) return rn_cpr
def process_spec(e, synpar, meta, actual, reported): """ Initialize Election e according to spec in list L. Here e is of type OpenAuditTool.Election Here synpar is of type syn.Syn_Parameters Each item in L has the form: (cid, pbcid, rv, av, num) where cid = contest id pbcid = paper ballot collection id rv = reported vote (may be ("-noCVR",) if pbcid is noCVR type av = actual vote num = number of ballots of this type Either or both of rv and av may be ("-NoSuchContest",) ("-Invalid",) or other such votes with selection ids starting with "-", signifying that they can't win the contest. The votes rv and av are arbitrary tuples, and may contain 0, 1, 2, or more selection ids. The FIRST av for a given contest becomes the "reported winner" for that contest, even if "num" is zero for that row or if the reported or actual votes don't show that vote as the "winner". """ # Store the election meta data for cid, pbcid, ro_c, audit_rate, contestants in meta: logger.info("Meta: %s %s %s %s", cid, pbcid, ro_c, audit_rate) # Record contest if cid not in e.cids: e.cids.append(cid) e.contest_type_c[cid] = "irv" e.params_c[cid] = "" e.write_ins_c[cid] = "no" e.selids_c[cid] = {} e.ro_c[cid] = ro_c # first av becomes reported outcome mid = "M{}-{}".format(len(e.cids), cid) e.mids.append(mid) e.cid_m[mid] = cid e.risk_method_m[mid] = "Bayes" e.risk_limit_m[mid] = 0.025 e.risk_upset_m[mid] = 0.975 e.sampling_mode_m[mid] = "Active" e.initial_status_m[mid] = "Open" e.risk_measurement_parameters_m[mid] = ("", "") # Record collection identifiers if pbcid not in e.pbcids: e.pbcids.append(pbcid) e.manager_p[pbcid] = "Nobody" e.cvr_type_p[pbcid] = "CVR" e.required_gid_p[pbcid] = "" e.possible_gid_p[pbcid] = "" e.bids_p[pbcid] = [] e.boxid_pb[pbcid] = {} e.position_pb[pbcid] = {} e.stamp_pb[pbcid] = {} e.max_audit_rate_p[pbcid] = int(audit_rate) e.comments_pb[pbcid] = {} # Add all combinations of selections to the selection pool for contestant in contestants: selids = [ str(i) + "-" + contestant for i in range(1, len(contestants) + 1) ] for selid in selids: if selid not in e.selids_c[cid]: e.selids_c[cid][selid] = True for (cid, pbcid, num, av) in actual: logger.info("actual %s %s %s %s", cid, pbcid, av, num) # When a row is not given specifying contest and winner # record the selection id for that vote # for selid in av: # if selid not in e.selids_c[cid]: # e.selids_c[cid][selid] = True # Record votes for pos in range(1, int(num) + 1): bid = "bid{}".format(1 + len(e.bids_p[pbcid])) utils.nested_set(e.av_cpb, [cid, pbcid, bid], av) e.bids_p[pbcid].append(bid) e.boxid_pb[pbcid][bid] = "box1" e.position_pb[pbcid][bid] = pos e.stamp_pb[pbcid][bid] = "" e.comments_pb[pbcid][bid] = "" # Start the counter for reported vote from 1 rv_map = {pbcid: 1 for pbcid in e.bids_p} # Update reported votes for (cid, pbcid, num, rv) in reported: logger.info("actual %s %s %s %s", cid, pbcid, rv, num) # When a row is not given specifying contest and winner # record the selection id for that vote for selid in rv: if selid not in e.selids_c[cid]: e.selids_c[cid][selid] = True for pos in range(1, int(num) + 1): bid = "bid{}".format(rv_map[pbcid]) rv_map[pbcid] += 1 utils.nested_set(e.rv_cpb, [cid, pbcid, bid], rv) # The number of reported vote should be the same as actual vote assert all( [len(e.bids_p[pbcid]) == rv_map[pbcid] - 1 for pbcid in e.bids_p])
def process_spec(e, synpar, L): """ Initialize Election e according to spec in list L. Here e is of type multi.Election Here synpar is of type syn.Syn_Parameters Each item in L has the form: (cid, pbcid, rv, av, num) where cid = contest id pbcid = paper ballot collection id rv = reported vote (may be ("-noCVR",) if pbcid is noCVR type av = actual vote num = number of ballots of this type Either or both of rv and av may be ("-NoSuchContest",) ("-Invalid",) or other such votes with selection ids starting with "-", signifying that they can't win the contest. The votes rv and av are arbitrary tuples, and may contain 0, 1, 2, or more selection ids. """ for (cid, pbcid, rv, av, num) in L: print(" ", cid, pbcid, rv, av, num) if cid not in e.cids: e.cids.append(cid) e.contest_type_c[cid] = "plurality" e.params_c[cid] = "" e.write_ins_c[cid] = "no" e.selids_c[cid] = {} e.ro_c[cid] = ("Alice", ) # FIX mid = "M{}-{}".format(len(e.cids), cid) e.mids.append(mid) e.cid_m[mid] = cid e.risk_method_m[mid] = "Bayes" e.risk_limit_m[mid] = 0.05 e.risk_upset_m[mid] = 0.98 e.sampling_mode_m[mid] = "Active" e.initial_status_m[mid] = "Open" e.risk_measurement_parameters_m[mid] = ("", "") for selid in rv: if selid not in e.selids_c[cid]: e.selids_c[cid][selid] = True for selid in av: if selid not in e.selids_c[cid]: e.selids_c[cid][selid] = True if pbcid not in e.pbcids: e.pbcids.append(pbcid) e.manager_p[pbcid] = "Nobody" e.cvr_type_p[pbcid] = "CVR" e.required_gid_p[pbcid] = "" e.possible_gid_p[pbcid] = "" e.bids_p[pbcid] = [] e.boxid_pb[pbcid] = {} e.position_pb[pbcid] = {} e.stamp_pb[pbcid] = {} e.max_audit_rate_p[pbcid] = 40 e.comments_pb[pbcid] = {} for pos in range(1, int(num) + 1): bid = "bid{}".format(1 + len(e.bids_p[pbcid])) utils.nested_set(e.rv_cpb, [cid, pbcid, bid], rv) utils.nested_set(e.av_cpb, [cid, pbcid, bid], av) e.bids_p[pbcid].append(bid) e.boxid_pb[pbcid][bid] = "box1" e.position_pb[pbcid][bid] = pos e.stamp_pb[pbcid][bid] = "" e.comments_pb[pbcid][bid] = ""
def generate_reported_ballot_manifests(e, synpar): """ Generate synthetic ballot manifest data. This procedure must be run *after* generate_reported. """ for pbcid in e.pbcids: for i, bid in enumerate(e.bids_p[pbcid]): boxid = 1 + ((i + 1) // synpar.box_size) position = 1 + (i % synpar.box_size) stamp = "stmp" + "{:06d}".format((i + 1) * 17) utils.nested_set(e.boxid_pb, [pbcid, bid], "box{}".format(boxid)) utils.nested_set(e.position_pb, [pbcid, bid], position) utils.nested_set(e.stamp_pb, [pbcid, bid], stamp) utils.nested_set(e.required_gid_pb, [pbcid, bid], "") utils.nested_set(e.possible_gid_pb, [pbcid, bid], "") utils.nested_set(e.comments_pb, [pbcid, bid], "")
def read_reported_ballot_manifests(e): """ Read ballot manifest file 21-reported-ballot-manifests and expand rows if needed. """ election_pathname = os.path.join(multi.ELECTIONS_ROOT, e.election_dirname) specification_pathname = os.path.join(election_pathname, "2-reported", "21-reported-ballot-manifests") fieldnames = [ "Collection", "Box", "Position", "Stamp", "Ballot id", "Number of ballots", "Required Contests", "Possible Contests", "Comments" ] for pbcid in e.pbcids: safe_pbcid = ids.filename_safe(pbcid) filename = utils.greatest_name(specification_pathname, "manifest-" + safe_pbcid, ".csv") file_pathname = os.path.join(specification_pathname, filename) rows = csv_readers.read_csv_file(file_pathname, fieldnames, varlen=False) for row in rows: pbcid = row["Collection"] boxid = row["Box"] position = row["Position"] stamp = row["Stamp"] bid = row["Ballot id"] try: num = int(row["Number of ballots"]) except ValueError: utils.myerror( "Number {} of ballots not an integer.".format(num)) if num <= 0: utils.mywarning( "Number {} of ballots not positive.".format(num)) req = row["Required Contests"] poss = row["Possible Contests"] comments = row["Comments"] bids = utils.count_on(bid, num) stamps = utils.count_on(stamp, num) positions = utils.count_on(position, num) for i in range(num): # utils.nested_set(e.bids_p, [pbcid, bids[i]], True) if pbcid not in e.bids_p: e.bids_p[pbcid] = [] e.bids_p[pbcid].append(bids[i]) utils.nested_set(e.boxid_pb, [pbcid, bids[i]], boxid) utils.nested_set(e.position_pb, [pbcid, bids[i]], position[i]) utils.nested_set(e.stamp_pb, [pbcid, bids[i]], stamps[i]) utils.nested_set(e.required_gid_pb, [pbcid, bids[i]], req) utils.nested_set(e.possible_gid_pb, [pbcid, bids[i]], poss) utils.nested_set(e.comments_pb, [pbcid, bids[i]], comments)