def try_n(n): n1 = math.ceil(n_ratio * n) n2 = int(n - n1) if (n1 < n1_original) or (n2 < n2_original): return 1 # Set up the p-value function for the CVR stratum if n1 == 0: cvr_pvalue = lambda alloc: 1 else: o1 = math.ceil(o1_rate * (n1 - n1_original)) + o1_obs o2 = math.ceil(o2_rate * (n1 - n1_original)) + o2_obs u1 = math.floor(u1_rate * (n1 - n1_original)) + u1_obs u2 = math.floor(u2_rate * (n1 - n1_original)) + u2_obs cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1,\ gamma=1.03905, o1=o1, \ u1=u1, o2=o2, u2=u2, \ reported_margin=reported_margin, N=N1, \ null_lambda=alloc) # Set up the p-value function for the no-CVR stratum if n2 == 0: nocvr_pvalue = lambda alloc: 1 n_w2 = 0 n_l2 = 0 else: expected_new_sample = [0]*math.ceil((n2-n2_original)*(n2l_obs/n2_original))+ \ [1]*int((n2-n2_original)*(n2w_obs/n2_original)) totsample = observed_nocvr_sample + expected_new_sample if len(totsample) < n2: totsample += [np.nan] * (n2 - len(totsample)) totsample = np.array(totsample) n_w2 = np.sum(totsample == 1) n_l2 = np.sum(totsample == 0) nocvr_pvalue = lambda alloc: ballot_polling_sprt( \ sample=totsample,\ popsize=N2, \ alpha=risk_limit,\ Vw=N_w2, Vl=N_l2, \ null_margin=(N_w2-N_l2) - \ alloc*reported_margin)['pvalue'] # Compute combined p-value bounding_fun = create_modulus(n1=n1, n2=n2, n_w2=n_w2, \ n_l2=n_l2, \ N1=N1, V_wl=reported_margin, gamma=gamma) res = maximize_fisher_combined_pvalue(N_w1=N_w1, N_l1=N_l1, N1=N1, \ N_w2=N_w2, N_l2=N_l2, N2=N2, \ pvalue_funs=(cvr_pvalue,\ nocvr_pvalue), \ stepsize=stepsize, \ modulus=bounding_fun, \ alpha=risk_limit) expected_pvalue = res['max_pvalue'] if verbose: print('...trying...', n, expected_pvalue) return expected_pvalue
def try_n(n): """ Find expected combined P-value for a total sample size n. """ n1 = math.ceil(n_ratio * n) n2 = int(n - n1) # Set up the p-value function for the CVR stratum if n1 == 0: cvr_pvalue = lambda alloc: 1 else: o1 = math.ceil(o1_rate * n1) o2 = math.ceil(o2_rate * n1) u1 = math.floor(u1_rate * n1) u2 = math.floor(u2_rate * n1) cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1, \ gamma=gamma, o1=o1, u1=u1, o2=o2, u2=u2, \ reported_margin=reported_margin, N=N1, \ null_lambda=alloc) # Set up the p-value function for the no-CVR stratum if n2 == 0: nocvr_pvalue = lambda alloc: 1 else: sample = [0]*int(n2*N_l2/N2)+[1]*int(n2*N_w2/N2)+ \ [np.nan]*int(n2*(N2-N_l2-N_w2)/N2) if len(sample) < n2: sample += [np.nan] * (n2 - len(sample)) nocvr_pvalue = lambda alloc: ballot_polling_sprt(sample=np.array(sample), \ popsize=N2, \ alpha=risk_limit,\ Vw=N_w2, Vl=N_l2, \ null_margin=(N_w2-N_l2) - \ alloc*reported_margin)['pvalue'] if N2 == 0: n_w2 = 0 n_l2 = 0 else: n_w2 = int(n2 * N_w2 / N2) n_l2 = int(n2 * N_l2 / N2) bounding_fun = create_modulus(n1=n1, n2=n2, n_w2=n_w2, \ n_l2=n_l2, \ N1=N1, V_wl=reported_margin, gamma=gamma) res = maximize_fisher_combined_pvalue(N_w1=N_w1, N_l1=N_l1, N1=N1, \ N_w2=N_w2, N_l2=N_l2, N2=N2, \ pvalue_funs=(cvr_pvalue, \ nocvr_pvalue), \ stepsize=stepsize, \ modulus=bounding_fun, \ alpha=risk_limit) expected_pvalue = res['max_pvalue'] if verbose: print('...trying...', n, expected_pvalue) return expected_pvalue
def try_n(n): n = int(n) sample = [0] * math.ceil(n * N_l2 / N2) + [1] * int(n * N_w2 / N2) if len(sample) < n: sample += [np.nan] * (n - len(sample)) expected_pvalue = ballot_polling_sprt(sample=np.array(sample), \ popsize=N2, \ alpha=risk_limit,\ Vw=N_w2, Vl=N_l2, \ null_margin=0)['pvalue'] if verbose: print('...trying...', n, expected_pvalue) return expected_pvalue
def compute_power(Ntot, Vw, Vl, pi, alpha, reps=10**3): Vu = Ntot - Vw - Vl pop = np.array([1] * Vw + [0] * Vl + [np.nan] * Vu) power_sum = 0 for i in range(reps): np.random.shuffle(pop) n = binom.rvs(Ntot, pi) # print(i, n) sam = pop[:n] res = ballot_polling_sprt(sample=sam, popsize=Ntot, alpha=alpha, Vw=Vw, Vl=Vl) if res['pvalue'] <= alpha: power_sum += 1 return power_sum / reps
def try_n(n): n = int(n) expected_new_sample = [0]*math.ceil((n-n2_original)*(n2l_obs/n2_original))+ \ [1]*int((n-n2_original)*(n2w_obs/n2_original)) totsample = observed_nocvr_sample + expected_new_sample if len(totsample) < n: totsample += [np.nan] * (n - len(totsample)) totsample = np.array(totsample) n_w2 = np.sum(totsample == 1) n_l2 = np.sum(totsample == 0) expected_pvalue = ballot_polling_sprt( \ sample=totsample,\ popsize=N2, \ alpha=risk_limit,\ Vw=N_w2, Vl=N_l2, \ null_margin=0)['pvalue'] if verbose: print('...trying...', n, expected_pvalue) return expected_pvalue
def simulate_fisher_combined_audit(N_w1, N_l1, N1, N_w2, N_l2, N2, n1, n2, alpha, reps=10000, verbose=False, feasible_lambda_range=None): """ Simulate the Fisher method of combining a ballot comparison audit and ballot polling audit, assuming the reported results are correct. Return the fraction of simulations where the the audit successfully confirmed the election results. Parameters ---------- N_w1 : int votes for the reported winner in the ballot comparison stratum N_l1 : int votes for the reported loser in the ballot comparison stratum N1 : int total number of votes in the ballot comparison stratum N_w2 : int votes for the reported winner in the ballot polling stratum N_l2 : int votes for the reported loser in the ballot polling stratum N2 : int total number of votes in the ballot polling stratum n1 : int sample size in the ballot comparison stratum n2 : int sample size in the ballot polling stratum alpha : float risk limit reps : int number of times to simulate the audit. Default 10,000 verbose : bool Optional, print iteration number if True feasible_lambda_range : array-like lower and upper limits to search over lambda. Optional, but will speed up the search Returns ------- float : fraction of simulations where the the audit successfully confirmed the election results """ margin = (N_w1 + N_w2) - (N_l1 + N_l2) N1 = N_w1 + N_l1 N2 = N_w2 + N_l2 Vwl = (N_w1 + N_w2) - (N_l1 + N_l2) pop2 = [1] * N_w2 + [0] * N_l2 + [np.nan] * (N2 - N_w2 - N_l2) cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1, gamma=1.03905, \ o1=0, u1=0, o2=0, u2=0, reported_margin=margin, N=N1, null_lambda=alloc) fisher_pvalues = np.zeros(reps) for i in range(reps): if verbose: print(i) sam = np.random.choice(pop2, n2, replace=False) nw2 = np.sum(sam == 1) nl2 = np.sum(sam == 0) mod = create_modulus(n1, n2, nw2, nl2, N1, Vwl, 1.03905) nocvr_pvalue = lambda alloc: \ ballot_polling_sprt(sample=sam, popsize=N2, alpha=alpha, Vw=N_w2, Vl=N_l2, \ null_margin=(N_w2-N_l2) - alloc*margin)['pvalue'] fisher_pvalues[i] = maximize_fisher_combined_pvalue( N_w1, N_l1, N1, N_w2, N_l2, N2, pvalue_funs=[cvr_pvalue, nocvr_pvalue], modulus=mod, feasible_lambda_range=feasible_lambda_range)['max_pvalue'] return np.mean(fisher_pvalues <= alpha)
def audit_contest(candidates, winners, losers, stratum_sizes,\ n1, n2, o1_obs, o2_obs, u1_obs, u2_obs, observed_poll, \ risk_limit, gamma, stepsize): """ Use SUITE to calculate risk of each (winner, loser) pair given the observed samples in the CVR and no-CVR strata. Parameters ---------- candidates : dict OrderedDict with candidate names as keys and [CVR votes, no-CVR votes, total votes] as values winners : list names of winners losers : list names of losers stratum_sizes : list list with total number of votes in the CVR and no-CVR strata n1 : int size of sample already drawn in the ballot comparison stratum n2 : int size of sample already drawn in the ballot polling stratum o1_obs : int observed number of ballots with 1-vote overstatements in the CVR stratum o2_obs : int observed number of ballots with 2-vote overstatements in the CVR stratum u1_obs : int observed number of ballots with 1-vote understatements in the CVR stratum u2_obs : int observed number of ballots with 2-vote understatements in the CVR stratum observed_poll : dict Dict with candidate names as keys and number of votes in the no-CVR stratum sample as values risk_limit : float risk limit gamma : float gamma from Lindeman and Stark (2012) stepsize : float stepsize for the discrete bounds on Fisher's combining function Returns ------- dict : attained risk for each (winner, loser) pair in the contest """ audit_pvalues = {} for k in product(winners, losers): N_w1 = candidates[k[0]][0] N_w2 = candidates[k[0]][1] N_l1 = candidates[k[1]][0] N_l2 = candidates[k[1]][1] reported_margin = (N_w1 + N_w2) - (N_l1 + N_l2) if n1 == 0: cvr_pvalue = lambda alloc: 1 else: cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1, \ gamma=gamma, \ o1=o1_obs, u1=u1_obs, o2=o2_obs, u2=u2_obs, \ reported_margin=reported_margin, \ N=stratum_sizes[0], \ null_lambda=alloc) n2w = observed_poll[k[0]] n2l = observed_poll[k[1]] if n2 == 0: nocvr_pvalue = lambda alloc: 1 else: sam = np.array([0] * n2l + [1] * n2w + [np.nan] * (n2 - n2w - n2l)) nocvr_pvalue = lambda alloc: ballot_polling_sprt(\ sample=sam, \ popsize=stratum_sizes[1], \ alpha=risk_limit, \ Vw=N_w2, Vl=N_l2, \ null_margin=(N_w2-N_l2) - \ alloc*reported_margin)['pvalue'] bounding_fun = create_modulus(n1=n1, n2=n2, \ n_w2=n2w, \ n_l2=n2l, \ N1=stratum_sizes[0], \ V_wl=reported_margin, gamma=gamma) res = maximize_fisher_combined_pvalue(N_w1=N_w1, N_l1=N_l1,\ N1=stratum_sizes[0], \ N_w2=N_w2, N_l2=N_l2, \ N2=stratum_sizes[1], \ pvalue_funs=(cvr_pvalue, nocvr_pvalue), \ stepsize=stepsize, \ modulus=bounding_fun, \ alpha=risk_limit) audit_pvalues[k] = res['max_pvalue'] return audit_pvalues