def try_n(n): n1 = math.ceil(n_ratio * n) n2 = int(n - n1) if (n1 < n1_original) or (n2 < n2_original): return 1 # Set up the p-value function for the CVR stratum if n1 == 0: cvr_pvalue = lambda alloc: 1 else: o1 = math.ceil(o1_rate * (n1 - n1_original)) + o1_obs o2 = math.ceil(o2_rate * (n1 - n1_original)) + o2_obs u1 = math.floor(u1_rate * (n1 - n1_original)) + u1_obs u2 = math.floor(u2_rate * (n1 - n1_original)) + u2_obs cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1,\ gamma=1.03905, o1=o1, \ u1=u1, o2=o2, u2=u2, \ reported_margin=reported_margin, N=N1, \ null_lambda=alloc) # Set up the p-value function for the no-CVR stratum if n2 == 0: nocvr_pvalue = lambda alloc: 1 n_w2 = 0 n_l2 = 0 else: expected_new_sample = [0]*math.ceil((n2-n2_original)*(n2l_obs/n2_original))+ \ [1]*int((n2-n2_original)*(n2w_obs/n2_original)) totsample = observed_nocvr_sample + expected_new_sample if len(totsample) < n2: totsample += [np.nan] * (n2 - len(totsample)) totsample = np.array(totsample) n_w2 = np.sum(totsample == 1) n_l2 = np.sum(totsample == 0) nocvr_pvalue = lambda alloc: ballot_polling_sprt( \ sample=totsample,\ popsize=N2, \ alpha=risk_limit,\ Vw=N_w2, Vl=N_l2, \ null_margin=(N_w2-N_l2) - \ alloc*reported_margin)['pvalue'] # Compute combined p-value bounding_fun = create_modulus(n1=n1, n2=n2, n_w2=n_w2, \ n_l2=n_l2, \ N1=N1, V_wl=reported_margin, gamma=gamma) res = maximize_fisher_combined_pvalue(N_w1=N_w1, N_l1=N_l1, N1=N1, \ N_w2=N_w2, N_l2=N_l2, N2=N2, \ pvalue_funs=(cvr_pvalue,\ nocvr_pvalue), \ stepsize=stepsize, \ modulus=bounding_fun, \ alpha=risk_limit) expected_pvalue = res['max_pvalue'] if verbose: print('...trying...', n, expected_pvalue) return expected_pvalue
def try_n(n): """ Find expected combined P-value for a total sample size n. """ n1 = math.ceil(n_ratio * n) n2 = int(n - n1) # Set up the p-value function for the CVR stratum if n1 == 0: cvr_pvalue = lambda alloc: 1 else: o1 = math.ceil(o1_rate * n1) o2 = math.ceil(o2_rate * n1) u1 = math.floor(u1_rate * n1) u2 = math.floor(u2_rate * n1) cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1, \ gamma=gamma, o1=o1, u1=u1, o2=o2, u2=u2, \ reported_margin=reported_margin, N=N1, \ null_lambda=alloc) # Set up the p-value function for the no-CVR stratum if n2 == 0: nocvr_pvalue = lambda alloc: 1 else: sample = [0]*int(n2*N_l2/N2)+[1]*int(n2*N_w2/N2)+ \ [np.nan]*int(n2*(N2-N_l2-N_w2)/N2) if len(sample) < n2: sample += [np.nan] * (n2 - len(sample)) nocvr_pvalue = lambda alloc: ballot_polling_sprt(sample=np.array(sample), \ popsize=N2, \ alpha=risk_limit,\ Vw=N_w2, Vl=N_l2, \ null_margin=(N_w2-N_l2) - \ alloc*reported_margin)['pvalue'] if N2 == 0: n_w2 = 0 n_l2 = 0 else: n_w2 = int(n2 * N_w2 / N2) n_l2 = int(n2 * N_l2 / N2) bounding_fun = create_modulus(n1=n1, n2=n2, n_w2=n_w2, \ n_l2=n_l2, \ N1=N1, V_wl=reported_margin, gamma=gamma) res = maximize_fisher_combined_pvalue(N_w1=N_w1, N_l1=N_l1, N1=N1, \ N_w2=N_w2, N_l2=N_l2, N2=N2, \ pvalue_funs=(cvr_pvalue, \ nocvr_pvalue), \ stepsize=stepsize, \ modulus=bounding_fun, \ alpha=risk_limit) expected_pvalue = res['max_pvalue'] if verbose: print('...trying...', n, expected_pvalue) return expected_pvalue
def compute_dist_over_pvalues(N_w1, N_l1, N_w2, N_l2, n1, n2, alpha, underlying=None): """ Hopefully compute a distribution over possible pvalues """ N_1 = N_w1 + N_l1 N_2 = N_w2 + N_l2 margin = N_w1 + N_w2 - N_l1 - N_l2 feasible_lambda_range = calculate_lambda_range(N_w1, N_l1, N_1, N_w2, N_l2, N_2) possible_winner_votes = range(0, n2 + 1) dist_over_winner_votes = binom.pmf(possible_winner_votes, n2, N_w2 / N_2) pvalues = [] for k, pr_k in zip(possible_winner_votes, dist_over_winner_votes): cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1, gamma=1.03905, \ o1=0, u1=0, o2=0, u2=0, reported_margin=margin, N=N_1, null_lambda=alloc) mod = create_modulus(n1, n2, k, n2 - k, N_1, margin, 1.03905) nocvr_pvalue = lambda alloc: \ minerva_pvalue_direct_count(winner_votes=k, n=n2, popsize=N_2, alpha=alpha, \ Vw=N_w2, Vl=N_l2, \ null_margin=(N_w2-N_l2) - alloc*margin) pvalue = maximize_fisher_combined_pvalue(N_w1, N_l1, \ N_1, N_w2, N_l2, N_2, \ pvalue_funs=[cvr_pvalue, nocvr_pvalue], \ modulus=mod, \ feasible_lambda_range=feasible_lambda_range)['max_pvalue'] pvalues.append(pvalue) #print("for k="+str(k)+" pval="+str(pvalue)) return { "possible_winner_votes": possible_winner_votes, "dist_over_winner_votes": dist_over_winner_votes, "pvalues": pvalues }
def find_sample_size_for_stopping_prob_efficiently_r2bravo(stopping_probability, N_w1, N_l1, N_w2, N_l2, n1, alpha, underlying=None, right=None): """ This function will also compute minimum round size for the passed stopping probability, but it will do so much more efficiently. At each point in the search only one pvalue will be computed. Should have done it this way to begin with. """ N_1 = N_w1 + N_l1 N_2 = N_w2 + N_l2 margin = N_w1 + N_w2 - N_l1 - N_l2 feasible_lambda_range=calculate_lambda_range(N_w1, N_l1, N_1, N_w2, N_l2, N_2) left = 1 right = N_2 while(1): n2 = math.ceil((left + right) / 2) # compute the 1 - stopping_probability quantile of the alt dist # kmax where pr[k >= kmax | alt] = stopping_probability # floor because we need to ensure at least a stopping_probability prob of stopping kmax = math.floor(binom.ppf(1 - stopping_probability, n2, N_w2 / N_2)) # compute pvalue for this kmax cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1, gamma=1.03905, \ o1=0, u1=0, o2=0, u2=0, reported_margin=margin, N=N_1, null_lambda=alloc) mod = create_modulus(n1, n2, kmax, n2 - kmax, N_1, margin, 1.03905) nocvr_pvalue = lambda alloc: \ r2bravo_pvalue_direct_count(winner_votes=kmax, n=n2, popsize=N_2, alpha=alpha, \ Vw=N_w2, Vl=N_l2, \ null_margin=(N_w2-N_l2) - alloc*margin) combination_results = maximize_fisher_combined_pvalue(N_w1, N_l1, \ N_1, N_w2, N_l2, N_2, \ pvalue_funs=[cvr_pvalue, nocvr_pvalue], \ modulus=mod, alpha=alpha, \ feasible_lambda_range=feasible_lambda_range) pvalue = combination_results['max_pvalue'] pvalue_comparison = combination_results['pvalue1'] pvalue_polling = combination_results['pvalue2'] alloc_lambda = combination_results['allocation lambda'] # update binary search bounds if (pvalue > alpha): left = n2 elif (pvalue <= alpha): right = n2 # when and right converge, right is the minimum round size that achieves stopping_probability if (left == right - 1 and n2 == right): if (right == N_2): print("requried round size is greater than stratum size") return { "round_size":right, "combined_pvalue":pvalue, "comparison_pvalue":pvalue_comparison, "polling_pvalue":pvalue_polling, "alloc_lambda":alloc_lambda }
def compute_dist_over_pvalues(N_w1, N_l1, N_w2, N_l2, n1, n2, alpha, underlying=None): """ Computes and returns lists of k values, their associated combined pvalue, and their probability under the null hypothesis for a 2-strata audit using Minerva in the ballot polling stratum. Assumes no errors in the comparisons. Args: N_w1 (int): reported number of votes for the winner in the comparison stratum N_l1 (int): reported number of votes for the loser in the comparison stratum N_w2 (int): reported number of votes for the winner in the polling stratum N_l2 (int): reported number of votes for the loser in the polling stratum n1 (int): number of comparisons n2 (int): first round size in the polling stratum alpha (float): risk limit underlying (dict): feature not yet implemented (coming soon to a repo near you!) Return {}: possible_winner_votes ([int]): possible number of winner votes in the polling sample dist_over_winner_votes ([float]): probability of each possible number of winner votes pvalues ([float]): combined pvalue resulting from each possible number of winner votes """ N_1 = N_w1 + N_l1 N_2 = N_w2 + N_l2 margin = N_w1 + N_w2 - N_l1 - N_l2 feasible_lambda_range=calculate_lambda_range(N_w1, N_l1, N_1, N_w2, N_l2, N_2) possible_winner_votes = range(0, n2 + 1) dist_over_winner_votes = binom.pmf(possible_winner_votes, n2, N_w2 / N_2) pvalues = [] for k, pr_k in zip(possible_winner_votes, dist_over_winner_votes): cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1, gamma=1.03905, \ o1=0, u1=0, o2=0, u2=0, reported_margin=margin, N=N_1, null_lambda=alloc) mod = create_modulus(n1, n2, k, n2 - k, N_1, margin, 1.03905) nocvr_pvalue = lambda alloc: \ minerva_pvalue_direct_count(winner_votes=k, n=n2, popsize=N_2, alpha=alpha, \ Vw=N_w2, Vl=N_l2, \ null_margin=(N_w2-N_l2) - alloc*margin) pvalue = maximize_fisher_combined_pvalue(N_w1, N_l1, \ N_1, N_w2, N_l2, N_2, \ pvalue_funs=[cvr_pvalue, nocvr_pvalue], \ modulus=mod, alpha=alpha, \ feasible_lambda_range=feasible_lambda_range)['max_pvalue'] pvalues.append(pvalue) #print("for k="+str(k)+" pval="+str(pvalue)) return { "possible_winner_votes":possible_winner_votes, "dist_over_winner_votes":dist_over_winner_votes, "pvalues":pvalues }
def audit_contest(candidates, winners, losers, stratum_sizes,\ n1, n2, o1_obs, o2_obs, u1_obs, u2_obs, observed_poll, \ risk_limit, gamma, stepsize): """ Use SUITE to calculate risk of each (winner, loser) pair given the observed samples in the CVR and no-CVR strata. Parameters ---------- candidates : dict OrderedDict with candidate names as keys and [CVR votes, no-CVR votes, total votes] as values winners : list names of winners losers : list names of losers stratum_sizes : list list with total number of votes in the CVR and no-CVR strata n1 : int size of sample already drawn in the ballot comparison stratum n2 : int size of sample already drawn in the ballot polling stratum o1_obs : int observed number of ballots with 1-vote overstatements in the CVR stratum o2_obs : int observed number of ballots with 2-vote overstatements in the CVR stratum u1_obs : int observed number of ballots with 1-vote understatements in the CVR stratum u2_obs : int observed number of ballots with 2-vote understatements in the CVR stratum observed_poll : dict Dict with candidate names as keys and number of votes in the no-CVR stratum sample as values risk_limit : float risk limit gamma : float gamma from Lindeman and Stark (2012) stepsize : float stepsize for the discrete bounds on Fisher's combining function Returns ------- dict : attained risk for each (winner, loser) pair in the contest """ audit_pvalues = {} for k in product(winners, losers): N_w1 = candidates[k[0]][0] N_w2 = candidates[k[0]][1] N_l1 = candidates[k[1]][0] N_l2 = candidates[k[1]][1] reported_margin = (N_w1 + N_w2) - (N_l1 + N_l2) if n1 == 0: cvr_pvalue = lambda alloc: 1 else: cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1, \ gamma=gamma, \ o1=o1_obs, u1=u1_obs, o2=o2_obs, u2=u2_obs, \ reported_margin=reported_margin, \ N=stratum_sizes[0], \ null_lambda=alloc) n2w = observed_poll[k[0]] n2l = observed_poll[k[1]] if n2 == 0: nocvr_pvalue = lambda alloc: 1 else: sam = np.array([0] * n2l + [1] * n2w + [np.nan] * (n2 - n2w - n2l)) nocvr_pvalue = lambda alloc: ballot_polling_sprt(\ sample=sam, \ popsize=stratum_sizes[1], \ alpha=risk_limit, \ Vw=N_w2, Vl=N_l2, \ null_margin=(N_w2-N_l2) - \ alloc*reported_margin)['pvalue'] bounding_fun = create_modulus(n1=n1, n2=n2, \ n_w2=n2w, \ n_l2=n2l, \ N1=stratum_sizes[0], \ V_wl=reported_margin, gamma=gamma) res = maximize_fisher_combined_pvalue(N_w1=N_w1, N_l1=N_l1,\ N1=stratum_sizes[0], \ N_w2=N_w2, N_l2=N_l2, \ N2=stratum_sizes[1], \ pvalue_funs=(cvr_pvalue, nocvr_pvalue), \ stepsize=stepsize, \ modulus=bounding_fun, \ alpha=risk_limit) audit_pvalues[k] = res['max_pvalue'] return audit_pvalues
def find_sample_size_for_stopping_prob_efficiently_r2bravo_linear( stopping_probability, N_w1, N_l1, N_w2, N_l2, n1, alpha, underlying=None, right=None, combine_func=None, stouffers=False): """ This function will also compute minimum round size for the passed stopping probability, but it will do so much more efficiently. At each point in the search only one pvalue will be computed. Should have done it this way to begin with. """ N_1 = N_w1 + N_l1 N_2 = N_w2 + N_l2 margin = N_w1 + N_w2 - N_l1 - N_l2 feasible_lambda_range = calculate_lambda_range(N_w1, N_l1, N_1, N_w2, N_l2, N_2) n2 = 1 while (1): # compute the 1 - stopping_probability quantile of the alt dist # kmax where pr[k >= kmax | alt] = stopping_probability # floor because we need to ensure at least a stopping_probability prob of stopping kmax = math.floor(binom.ppf(1 - stopping_probability, n2, N_w2 / N_2)) # compute pvalue for this kmax cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1, gamma=1.03905, \ o1=0, u1=0, o2=0, u2=0, reported_margin=margin, N=N_1, null_lambda=alloc) mod = create_modulus(n1, n2, kmax, n2 - kmax, N_1, margin, 1.03905) nocvr_pvalue = lambda alloc: \ r2bravo_pvalue_direct_count(winner_votes=kmax, n=n2, popsize=N_2, alpha=alpha, \ Vw=N_w2, Vl=N_l2, \ null_margin=(N_w2-N_l2) - alloc*margin) if stouffers is False: combination_results = maximize_fisher_combined_pvalue(N_w1, N_l1, \ N_1, N_w2, N_l2, N_2, \ pvalue_funs=[cvr_pvalue, nocvr_pvalue], \ modulus=mod, alpha=alpha, \ feasible_lambda_range=feasible_lambda_range, combine_func=combine_func) else: combination_results = maximize_stouffers_combined_pvalue(N_w1, N_l1, \ N_1, N_w2, N_l2, N_2, \ pvalue_funs=[cvr_pvalue, nocvr_pvalue], \ alpha=alpha, \ stouffers=combine_func) pvalue = combination_results['max_pvalue'] pvalue_comparison = combination_results['pvalue1'] #print(pvalue_comparison) pvalue_polling = combination_results['pvalue2'] alloc_lambda = combination_results['allocation lambda'] # update binary search bounds if (pvalue < alpha): return { "round_size": n2, "combined_pvalue": pvalue, "comparison_pvalue": pvalue_comparison, "polling_pvalue": pvalue_polling, "alloc_lambda": alloc_lambda } # update round size n2 = n2 + 1
def simulate_fisher_combined_audits(N_w1, N_l1, N_w2, N_l2, n1, n2, alpha, reps=10000, verbose=False, feasible_lambda_range=None, underlying=None): """ Simulate the Fisher method of combining a ballot comparison audit and ballot polling minerva audit, assuming the true results contain underlying winner votes. Return the fraction of simulations where the the audit successfully confirmed the election results for each of several audits. Parameters ---------- N_w1 : int votes for the reported winner in the ballot comparison stratum N_l1 : int votes for the reported loser in the ballot comparison stratum N_w2 : int votes for the reported winner in the ballot polling stratum N_l2 : int votes for the reported loser in the ballot polling stratum n1 : int sample size in the ballot comparison stratum n2 : int sample size in the ballot polling stratum alpha : float risk limit reps : int number of times to simulate the audit. Default 10,000 verbose : bool Optional, print iteration number if True feasible_lambda_range : array-like lower and upper limits to search over lambda. Optional, but will speed up the search underlying : int true count of votes for winner overall (default assumes alt) Returns ------- dict : fractions of simulations where the the audits successfully confirmed the election results """ if underlying is None: underlying = N_w2 N1 = N_w1 + N_l1 N2 = N_w2 + N_l2 margin = (N_w1 + N_w2) - (N_l1 + N_l2) # Population generated based on 'underlying' (assumed winner count) pop2 = [1] * underlying + [0] * (N2 - underlying) cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1, gamma=1.03905, \ o1=0, u1=0, o2=0, u2=0, reported_margin=margin, N=N1, null_lambda=alloc) fisher_pvalues_r2_bravo = np.zeros(reps) fisher_pvalues_r2_bravo_direct = np.zeros(reps) fisher_pvalues_minerva = np.zeros(reps) fisher_pvalues_minerva_direct = np.zeros(reps) # Generate samples samples = [] for i in range(reps): sam = np.random.choice(pop2, n2, replace=True) samples.append(sam) """ # R2 BRAVO start = time.time() for i, sam in zip(range(len(samples)),samples): nw2 = np.sum(sam == 1) nl2 = np.sum(sam == 0) mod = create_modulus(n1, n2, nw2, nl2, N1, margin, 1.03905) nocvr_pvalue_r2_bravo = lambda alloc: \ ballot_polling_sprt(sample=sam, popsize=N2, alpha=alpha, \ Vw=N_w2, Vl=N_l2, \ null_margin=(N_w2-N_l2) - alloc*margin)['pvalue'] fisher_pvalues_r2_bravo[i] = maximize_fisher_combined_pvalue(N_w1, N_l1, \ N1, N_w2, N_l2, N2, \ pvalue_funs=[cvr_pvalue, nocvr_pvalue_r2_bravo], \ modulus=mod, \ feasible_lambda_range=feasible_lambda_range)['max_pvalue'] r2_bravo_time = time.time() - start """ # R2 BRAVO (direct) start = time.time() for i, sam in zip(range(len(samples)), samples): nw2 = np.sum(sam == 1) nl2 = np.sum(sam == 0) mod = create_modulus(n1, n2, nw2, nl2, N1, margin, 1.03905) nocvr_pvalue_r2_bravo_direct = lambda alloc: \ r2_bravo_pvalue_direct(sample=sam, popsize=N2, alpha=alpha, \ Vw=N_w2, Vl=N_l2, \ null_margin=(N_w2-N_l2) - alloc*margin) fisher_pvalues_r2_bravo_direct[i] = maximize_fisher_combined_pvalue(N_w1, N_l1, \ N1, N_w2, N_l2, N2, \ pvalue_funs=[cvr_pvalue, nocvr_pvalue_r2_bravo_direct], \ modulus=mod, \ feasible_lambda_range=feasible_lambda_range)['max_pvalue'] r2_bravo_direct_time = time.time() - start """ # Minerva (Grant/r2b2) start = time.time() for i, sam in zip(range(len(samples)),samples): nw2 = np.sum(sam == 1) nl2 = np.sum(sam == 0) mod = create_modulus(n1, n2, nw2, nl2, N1, margin, 1.03905) nocvr_pvalue_minerva = lambda alloc: \ minerva_pvalue(sample=sam, popsize=N2, alpha=alpha, \ Vw=N_w2, Vl=N_l2, \ null_margin=(N_w2-N_l2) - alloc*margin) fisher_pvalues_minerva[i] = maximize_fisher_combined_pvalue(N_w1, N_l1, \ N1, N_w2, N_l2, N2, \ pvalue_funs=[cvr_pvalue, nocvr_pvalue_minerva], \ modulus=mod, \ feasible_lambda_range=feasible_lambda_range)['max_pvalue'] minerva_time = time.time() - start """ """ # Minerva (direct) start = time.time() for i, sam in zip(range(len(samples)),samples): nw2 = np.sum(sam == 1) nl2 = np.sum(sam == 0) mod = create_modulus(n1, n2, nw2, nl2, N1, margin, 1.03905) nocvr_pvalue_minerva_direct = lambda alloc: \ minerva_pvalue_direct(sample=sam, popsize=N2, alpha=alpha, \ Vw=N_w2, Vl=N_l2, \ null_margin=(N_w2-N_l2) - alloc*margin) fisher_pvalues_minerva_direct[i] = maximize_fisher_combined_pvalue(N_w1, N_l1, \ N1, N_w2, N_l2, N2, \ pvalue_funs=[cvr_pvalue, nocvr_pvalue_minerva_direct], \ modulus=mod, \ feasible_lambda_range=feasible_lambda_range)['max_pvalue'] minerva_direct_time = time.time() - start """ """ "r2_bravo" : np.mean(fisher_pvalues_r2_bravo <= alpha), "r2_bravo_time" : r2_bravo_time, "r2_bravo_avg_pval" : np.mean(fisher_pvalues_r2_bravo), "r2_bravo_direct" : np.mean(fisher_pvalues_r2_bravo_direct <= alpha), "r2_bravo_direct_time" : r2_bravo_direct_time, "r2_bravo_direct_avg_pval" : np.mean(fisher_pvalues_r2_bravo_direct), "minerva" : np.mean(fisher_pvalues_minerva <= alpha), "minerva_time" : minerva_time, "minerva_avg_pval" : np.mean(fisher_pvalues_minerva), "minerva_direct" : np.mean(fisher_pvalues_minerva_direct <= alpha), "minerva_direct_time" : minerva_direct_time, "minerva_direct_avg_pval" : np.mean(fisher_pvalues_minerva_direct) """ return { "r2_bravo_direct": np.mean(fisher_pvalues_r2_bravo_direct <= alpha), "r2_bravo_direct_time": r2_bravo_direct_time, "r2_bravo_direct_avg_pval": np.mean(fisher_pvalues_r2_bravo_direct), }