コード例 #1
0
ファイル: suite_tools.py プロジェクト: pbstark/CORLA18
        def try_n(n):
            n1 = math.ceil(n_ratio * n)
            n2 = int(n - n1)

            if (n1 < n1_original) or (n2 < n2_original):
                return 1

            # Set up the p-value function for the CVR stratum
            if n1 == 0:
                cvr_pvalue = lambda alloc: 1
            else:
                o1 = math.ceil(o1_rate * (n1 - n1_original)) + o1_obs
                o2 = math.ceil(o2_rate * (n1 - n1_original)) + o2_obs
                u1 = math.floor(u1_rate * (n1 - n1_original)) + u1_obs
                u2 = math.floor(u2_rate * (n1 - n1_original)) + u2_obs
                cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1,\
                                    gamma=1.03905, o1=o1, \
                                    u1=u1, o2=o2, u2=u2, \
                                    reported_margin=reported_margin, N=N1, \
                                    null_lambda=alloc)

            # Set up the p-value function for the no-CVR stratum
            if n2 == 0:
                nocvr_pvalue = lambda alloc: 1
                n_w2 = 0
                n_l2 = 0
            else:
                expected_new_sample = [0]*math.ceil((n2-n2_original)*(n2l_obs/n2_original))+ \
                                      [1]*int((n2-n2_original)*(n2w_obs/n2_original))
                totsample = observed_nocvr_sample + expected_new_sample
                if len(totsample) < n2:
                    totsample += [np.nan] * (n2 - len(totsample))
                totsample = np.array(totsample)
                n_w2 = np.sum(totsample == 1)
                n_l2 = np.sum(totsample == 0)

                nocvr_pvalue = lambda alloc: ballot_polling_sprt( \
                                sample=totsample,\
                                popsize=N2, \
                                alpha=risk_limit,\
                                Vw=N_w2, Vl=N_l2, \
                                null_margin=(N_w2-N_l2) - \
                                 alloc*reported_margin)['pvalue']

            # Compute combined p-value
            bounding_fun = create_modulus(n1=n1, n2=n2,
                                          n_w2=n_w2, \
                                          n_l2=n_l2, \
                                          N1=N1, V_wl=reported_margin, gamma=gamma)
            res = maximize_fisher_combined_pvalue(N_w1=N_w1, N_l1=N_l1, N1=N1, \
                                                  N_w2=N_w2, N_l2=N_l2, N2=N2, \
                                                  pvalue_funs=(cvr_pvalue,\
                                                    nocvr_pvalue), \
                                                  stepsize=stepsize, \
                                                  modulus=bounding_fun, \
                                                  alpha=risk_limit)
            expected_pvalue = res['max_pvalue']
            if verbose:
                print('...trying...', n, expected_pvalue)
            return expected_pvalue
コード例 #2
0
    def try_n(n):
        """
        Find expected combined P-value for a total sample size n.
        """
        n1 = math.ceil(n_ratio * n)
        n2 = int(n - n1)

        # Set up the p-value function for the CVR stratum
        if n1 == 0:
            cvr_pvalue = lambda alloc: 1
        else:
            o1 = math.ceil(o1_rate * n1)
            o2 = math.ceil(o2_rate * n1)
            u1 = math.floor(u1_rate * n1)
            u2 = math.floor(u2_rate * n1)
            cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1, \
                            gamma=gamma, o1=o1, u1=u1, o2=o2, u2=u2, \
                            reported_margin=reported_margin, N=N1, \
                            null_lambda=alloc)

        # Set up the p-value function for the no-CVR stratum
        if n2 == 0:
            nocvr_pvalue = lambda alloc: 1
        else:
            sample = [0]*int(n2*N_l2/N2)+[1]*int(n2*N_w2/N2)+ \
                        [np.nan]*int(n2*(N2-N_l2-N_w2)/N2)
            if len(sample) < n2:
                sample += [np.nan] * (n2 - len(sample))
            nocvr_pvalue = lambda alloc: ballot_polling_sprt(sample=np.array(sample), \
                            popsize=N2, \
                            alpha=risk_limit,\
                            Vw=N_w2, Vl=N_l2, \
                            null_margin=(N_w2-N_l2) - \
                             alloc*reported_margin)['pvalue']

        if N2 == 0:
            n_w2 = 0
            n_l2 = 0
        else:
            n_w2 = int(n2 * N_w2 / N2)
            n_l2 = int(n2 * N_l2 / N2)
        bounding_fun = create_modulus(n1=n1, n2=n2,
                                      n_w2=n_w2, \
                                      n_l2=n_l2, \
                                      N1=N1, V_wl=reported_margin, gamma=gamma)
        res = maximize_fisher_combined_pvalue(N_w1=N_w1, N_l1=N_l1, N1=N1, \
                                              N_w2=N_w2, N_l2=N_l2, N2=N2, \
                                              pvalue_funs=(cvr_pvalue, \
                                               nocvr_pvalue), \
                                              stepsize=stepsize, \
                                              modulus=bounding_fun, \
                                              alpha=risk_limit)
        expected_pvalue = res['max_pvalue']
        if verbose:
            print('...trying...', n, expected_pvalue)
        return expected_pvalue
コード例 #3
0
ファイル: suite_tools.py プロジェクト: pbstark/CORLA18
 def try_n(n):
     n = int(n)
     sample = [0] * math.ceil(n * N_l2 / N2) + [1] * int(n * N_w2 / N2)
     if len(sample) < n:
         sample += [np.nan] * (n - len(sample))
     expected_pvalue = ballot_polling_sprt(sample=np.array(sample), \
                     popsize=N2, \
                     alpha=risk_limit,\
                     Vw=N_w2, Vl=N_l2, \
                     null_margin=0)['pvalue']
     if verbose:
         print('...trying...', n, expected_pvalue)
     return expected_pvalue
コード例 #4
0
def compute_power(Ntot, Vw, Vl, pi, alpha, reps=10**3):
    Vu = Ntot - Vw - Vl
    pop = np.array([1] * Vw + [0] * Vl + [np.nan] * Vu)

    power_sum = 0
    for i in range(reps):
        np.random.shuffle(pop)
        n = binom.rvs(Ntot, pi)
        #        print(i, n)
        sam = pop[:n]
        res = ballot_polling_sprt(sample=sam,
                                  popsize=Ntot,
                                  alpha=alpha,
                                  Vw=Vw,
                                  Vl=Vl)
        if res['pvalue'] <= alpha:
            power_sum += 1
    return power_sum / reps
コード例 #5
0
ファイル: suite_tools.py プロジェクト: pbstark/CORLA18
        def try_n(n):
            n = int(n)
            expected_new_sample = [0]*math.ceil((n-n2_original)*(n2l_obs/n2_original))+ \
                                  [1]*int((n-n2_original)*(n2w_obs/n2_original))
            totsample = observed_nocvr_sample + expected_new_sample
            if len(totsample) < n:
                totsample += [np.nan] * (n - len(totsample))
            totsample = np.array(totsample)
            n_w2 = np.sum(totsample == 1)
            n_l2 = np.sum(totsample == 0)

            expected_pvalue = ballot_polling_sprt( \
                            sample=totsample,\
                            popsize=N2, \
                            alpha=risk_limit,\
                            Vw=N_w2, Vl=N_l2, \
                            null_margin=0)['pvalue']
            if verbose:
                print('...trying...', n, expected_pvalue)
            return expected_pvalue
コード例 #6
0
def simulate_fisher_combined_audit(N_w1,
                                   N_l1,
                                   N1,
                                   N_w2,
                                   N_l2,
                                   N2,
                                   n1,
                                   n2,
                                   alpha,
                                   reps=10000,
                                   verbose=False,
                                   feasible_lambda_range=None):
    """
    Simulate the Fisher method of combining a ballot comparison audit
    and ballot polling audit, assuming the reported results are correct.
    Return the fraction of simulations where the the audit successfully
    confirmed the election results.
    
    Parameters
    ----------
    N_w1 : int
        votes for the reported winner in the ballot comparison stratum
    N_l1 : int
        votes for the reported loser in the ballot comparison stratum
    N1 : int
        total number of votes in the ballot comparison stratum
    N_w2 : int
        votes for the reported winner in the ballot polling stratum
    N_l2 : int
        votes for the reported loser in the ballot polling stratum
    N2 : int
        total number of votes in the ballot polling stratum
    n1 : int
        sample size in the ballot comparison stratum
    n2 : int
        sample size in the ballot polling stratum
    alpha : float
        risk limit
    reps : int
        number of times to simulate the audit. Default 10,000
    verbose : bool
        Optional, print iteration number if True
    feasible_lambda_range : array-like
        lower and upper limits to search over lambda. Optional, but will speed up the search
    
    Returns
    -------
    float : fraction of simulations where the the audit successfully
    confirmed the election results
    """
    margin = (N_w1 + N_w2) - (N_l1 + N_l2)
    N1 = N_w1 + N_l1
    N2 = N_w2 + N_l2
    Vwl = (N_w1 + N_w2) - (N_l1 + N_l2)
    pop2 = [1] * N_w2 + [0] * N_l2 + [np.nan] * (N2 - N_w2 - N_l2)

    cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1, gamma=1.03905, \
                                   o1=0, u1=0, o2=0, u2=0,
                                   reported_margin=margin, N=N1,
                                   null_lambda=alloc)
    fisher_pvalues = np.zeros(reps)

    for i in range(reps):
        if verbose:
            print(i)
        sam = np.random.choice(pop2, n2, replace=False)
        nw2 = np.sum(sam == 1)
        nl2 = np.sum(sam == 0)
        mod = create_modulus(n1, n2, nw2, nl2, N1, Vwl, 1.03905)
        nocvr_pvalue = lambda alloc: \
            ballot_polling_sprt(sample=sam, popsize=N2, alpha=alpha,
                                Vw=N_w2, Vl=N_l2, \
                                null_margin=(N_w2-N_l2) - alloc*margin)['pvalue']
        fisher_pvalues[i] = maximize_fisher_combined_pvalue(
            N_w1,
            N_l1,
            N1,
            N_w2,
            N_l2,
            N2,
            pvalue_funs=[cvr_pvalue, nocvr_pvalue],
            modulus=mod,
            feasible_lambda_range=feasible_lambda_range)['max_pvalue']
    return np.mean(fisher_pvalues <= alpha)
コード例 #7
0
def audit_contest(candidates, winners, losers, stratum_sizes,\
                  n1, n2, o1_obs, o2_obs, u1_obs, u2_obs, observed_poll, \
                  risk_limit, gamma, stepsize):
    """
    Use SUITE to calculate risk of each (winner, loser) pair
    given the observed samples in the CVR and no-CVR strata.

    Parameters
    ----------
    candidates : dict
        OrderedDict with candidate names as keys and 
        [CVR votes, no-CVR votes, total votes] as values
    winners : list
        names of winners
    losers : list
        names of losers
    stratum_sizes : list
        list with total number of votes in the CVR and no-CVR strata
    n1 : int
        size of sample already drawn in the ballot comparison stratum
    n2 : int
        size of sample already drawn in the ballot polling stratum
    o1_obs : int
        observed number of ballots with 1-vote overstatements in the CVR stratum
    o2_obs : int
        observed number of ballots with 2-vote overstatements in the CVR stratum
    u1_obs : int
        observed number of ballots with 1-vote understatements in the CVR
        stratum
    u2_obs : int
        observed number of ballots with 2-vote understatements in the CVR
        stratum
    observed_poll : dict
        Dict with candidate names as keys and number of votes in the no-CVR
        stratum sample as values
    risk_limit : float
        risk limit
    gamma : float
        gamma from Lindeman and Stark (2012)
    stepsize : float
        stepsize for the discrete bounds on Fisher's combining function
    Returns
    -------
    dict : attained risk for each (winner, loser) pair in the contest
    """
    audit_pvalues = {}

    for k in product(winners, losers):
        N_w1 = candidates[k[0]][0]
        N_w2 = candidates[k[0]][1]
        N_l1 = candidates[k[1]][0]
        N_l2 = candidates[k[1]][1]
        reported_margin = (N_w1 + N_w2) - (N_l1 + N_l2)
        if n1 == 0:
            cvr_pvalue = lambda alloc: 1
        else:
            cvr_pvalue = lambda alloc: ballot_comparison_pvalue(n=n1, \
                        gamma=gamma, \
                        o1=o1_obs, u1=u1_obs, o2=o2_obs, u2=u2_obs, \
                        reported_margin=reported_margin, \
                        N=stratum_sizes[0], \
                        null_lambda=alloc)

        n2w = observed_poll[k[0]]
        n2l = observed_poll[k[1]]
        if n2 == 0:
            nocvr_pvalue = lambda alloc: 1
        else:
            sam = np.array([0] * n2l + [1] * n2w + [np.nan] * (n2 - n2w - n2l))
            nocvr_pvalue = lambda alloc: ballot_polling_sprt(\
                                sample=sam, \
                                popsize=stratum_sizes[1], \
                                alpha=risk_limit, \
                                Vw=N_w2, Vl=N_l2, \
                                null_margin=(N_w2-N_l2) - \
                                  alloc*reported_margin)['pvalue']
        bounding_fun = create_modulus(n1=n1, n2=n2, \
                                      n_w2=n2w, \
                                      n_l2=n2l, \
                                      N1=stratum_sizes[0], \
                                      V_wl=reported_margin, gamma=gamma)
        res = maximize_fisher_combined_pvalue(N_w1=N_w1, N_l1=N_l1,\
                         N1=stratum_sizes[0], \
                         N_w2=N_w2, N_l2=N_l2, \
                         N2=stratum_sizes[1], \
                         pvalue_funs=(cvr_pvalue, nocvr_pvalue), \
                         stepsize=stepsize, \
                         modulus=bounding_fun, \
                         alpha=risk_limit)
        audit_pvalues[k] = res['max_pvalue']

    return audit_pvalues