예제 #1
0
def ci_cmle_is(X, v, theta_grid, alpha_level, T = 100, verbose = False):
    cmle_is = np.empty_like(theta_grid)
    r = X.sum(1)
    c = X.sum(0)
    for l, theta_l in enumerate(theta_grid):
        logit_P_l = theta_l * v
        w_l = np.exp(logit_P_l)

        z = cond_a_sample(r, c, w_l, T)
        logf = np.empty(T)
        for t in range(T):
            logQ, logP = z[t][1], z[t][2]
            logf[t] = logP - logQ
        logkappa = -np.log(T) + logsumexp(logf)

        if verbose:
            logcvsq = -np.log(T - 1) - 2 * logkappa + \
              logsumexp(2 * logabsdiffexp(logf, logkappa))
            print 'est. cv^2 = %.2f (T = %d)' % (np.exp(logcvsq), T)

        cmle_is[l] = np.sum(np.log(w_l[X])) - logkappa

    crit = -0.5 * chi2.ppf(1 - alpha_level, 1)
    ci = invert_test(theta_grid, cmle_is - cmle_is.max(), crit)
    if params['plot']:
        plot_statistics(ax_cmle_is, theta_grid, cmle_is - cmle_is.max(), crit)
        cmle_is_coverage_data['cis'].append(ci)
        cmle_is_coverage_data['theta_grid'] = theta_grid
        cmle_is_coverage_data['crit'] = crit
    return ci
def log_partition_is(z, cvsq = False):
    """From importance-weighted sampled, estimate log-partition function."""
    T = len(z)

    logf = np.empty(T)
    for t in range(T):
        logf[t] = z[t][2] - z[t][1]
    logkappa = -np.log(T) + logsumexp(logf)
    if not cvsq:
        return logkappa
    else:
        logcvsq = -np.log(T - 1) - 2 * logkappa + \
          logsumexp(2 * logabsdiffexp(logf, logkappa))
        return logkappa, logcvsq
예제 #3
0
def log_partition_is(z, cvsq=False):
    """From importance-weighted sampled, estimate log-partition function."""
    T = len(z)

    logf = np.empty(T)
    for t in range(T):
        logf[t] = z[t][2] - z[t][1]
    logkappa = -np.log(T) + logsumexp(logf)
    if not cvsq:
        return logkappa
    else:
        logcvsq = -np.log(T - 1) - 2 * logkappa + \
          logsumexp(2 * logabsdiffexp(logf, logkappa))
        return logkappa, logcvsq
def ci_cmle_is(X, v, theta_grid, alpha_level, T = 100, verbose = False):
    cmle_is = np.empty_like(theta_grid)
    r = X.sum(1)
    c = X.sum(0)
    for l, theta_l in enumerate(theta_grid):
        logit_P_l = theta_l * v
        w_l = np.exp(logit_P_l)

        z = cond_a_sample(r, c, w_l, T)
        logf = np.empty(T)
        for t in range(T):
            logQ, logP = z[t][1], z[t][2]
            logf[t] = logP - logQ
        logkappa = -np.log(T) + logsumexp(logf)

        if verbose:
            logcvsq = -np.log(T - 1) - 2 * logkappa + \
              logsumexp(2 * logabsdiffexp(logf, logkappa))
            print 'est. cv^2 = %.2f (T = %d)' % (np.exp(logcvsq), T)

        cmle_is[l] = np.sum(np.log(w_l[X])) - logkappa

    return invert_test(theta_grid, cmle_is - cmle_is.max(),
                       -0.5 * chi2.ppf(1 - alpha_level, 1))
예제 #5
0
def ci_cmle_is(X, v, theta_grid, alpha_level, T=100, verbose=False):
    cmle_is = np.empty_like(theta_grid)
    r = X.sum(1)
    c = X.sum(0)
    for l, theta_l in enumerate(theta_grid):
        logit_P_l = theta_l * v
        w_l = np.exp(logit_P_l)

        z = cond_a_sample(r, c, w_l, T)
        logf = np.empty(T)
        for t in range(T):
            logQ, logP = z[t][1], z[t][2]
            logf[t] = logP - logQ
        logkappa = -np.log(T) + logsumexp(logf)

        if verbose:
            logcvsq = -np.log(T - 1) - 2 * logkappa + \
              logsumexp(2 * logabsdiffexp(logf, logkappa))
            print 'est. cv^2 = %.2f (T = %d)' % (np.exp(logcvsq), T)

        cmle_is[l] = np.sum(np.log(w_l[X])) - logkappa

    return invert_test(theta_grid, cmle_is - cmle_is.max(),
                       -0.5 * chi2.ppf(1 - alpha_level, 1))
예제 #6
0
def ci_conservative_generic(X,
                            K,
                            theta_grid,
                            alpha_level,
                            suff,
                            log_likelihood,
                            sample,
                            t,
                            corrected=True,
                            two_sided=True,
                            verbose=False):
    L = len(theta_grid)

    # Generate samples from the mixture proposal distribution
    Y = [sample(theta_grid[np.random.randint(L)]) for k in range(K)]

    # Test statistic at observation, for each grid point
    t_X = t(X).reshape((L, 1))
    if verbose:
        print 'X: t_min = %.2f, t_max = %.2f' % (t_X.min(), t_X.max())

    # Statistics for the samples from the proposal distribution only
    # need to be calculated once...
    t_Y = np.empty((L, K + 1))
    t_Y[:, K] = 0.0
    for k in range(K):
        t_Y[:, k] = t(Y[k])
        if verbose:
            print 'Y_%d: t_min = %.2f, t_max = %.2f' % \
                (k, t_Y[:,k].min(), t_Y[:,k].max())
    if two_sided:
        I_t_Y_plus = t_Y >= t_X
        I_t_Y_plus[:, K] = True
    I_t_Y_minus = -t_Y >= -t_X
    I_t_Y_minus[:, K] = True

    # Probabilities under each component of the proposal distribution
    # only need to be calculated once...
    log_Q_X = np.empty(L)
    log_Q_Y = np.empty((L, K))
    for l in range(L):
        theta_l = theta_grid[l]

        log_Q_X[l] = log_likelihood(X, theta_l)
        for k in range(K):
            log_Q_Y[l, k] = log_likelihood(Y[k], theta_l)
        if verbose:
            print '%.2f: %.2g, %.2g' % \
              (theta_l, np.exp(log_Q_X[l]), np.exp(log_Q_Y[l].max()))
    log_Q_sum_X = logsumexp(log_Q_X)
    log_Q_sum_Y = np.empty(K)
    for k in range(K):
        log_Q_sum_Y[k] = logsumexp(log_Q_Y[:, k])

    # Step over the grid, calculating approximate p-values
    if two_sided:
        log_p_plus = np.empty(L)
    log_p_minus = np.empty(L)
    for l in range(L):
        theta_l = theta_grid[l]
        log_w_l = np.empty(K + 1)

        # X contribution
        if corrected:
            log_w_l[K] = theta_l * (suff * X).sum() - log_Q_sum_X
        else:
            log_w_l[K] = -np.inf

        # Y contribution
        for k in range(K):
            log_w_l[k] = theta_l * (suff * Y[k]).sum() - log_Q_sum_Y[k]

        if two_sided:
            log_p_num_plus = logsumexp(log_w_l[I_t_Y_plus[l]])
        log_p_num_minus = logsumexp(log_w_l[I_t_Y_minus[l]])
        log_p_denom = logsumexp(log_w_l)

        if verbose:
            print '%.2f: %.2g (%.2g, %.2g)' % \
              (theta_l, log_w_l[K], log_w_l[0:K].min(), log_w_l[0:K].max())

        if two_sided:
            log_p_plus[l] = log_p_num_plus - log_p_denom
        log_p_minus[l] = log_p_num_minus - log_p_denom

    if two_sided:
        # p_pm = min(1, 2 * min(p_plus, p_minus))
        log_p_vals = np.fmin(0, np.log(2) + np.fmin(log_p_plus, log_p_minus))
    else:
        log_p_vals = np.fmin(0, log_p_minus)

    return invert_test(theta_grid, log_p_vals, np.log(alpha_level))
def ci_conservative_generic(X, K, theta_grid, alpha_level,
                            log_likelihood, sample, t,
                            verbose = False):
    L = len(theta_grid)
    
    # Generate samples from the mixture proposal distribution
    Y = []
    for k in range(K):
        l_k = np.random.randint(L)
        theta_k = theta_grid[l_k]
        Y.append(sample(theta_k))

    # Test statistic at observation
    t_X = t(X)
        
    # Statistics for the samples from the proposal distribution only
    # need to be calculated once...
    t_Y = np.zeros(K + 1)
    for k in range(K):
        t_Y[k] = t(Y[k])
    I_t_Y_plus = t_Y >= t_X
    I_t_Y_plus[K] = True
    I_t_Y_minus = -t_Y >= -t_X
    I_t_Y_minus[K] = True

    # Probabilities under each component of the proposal distribution
    # only need to be calculated once...
    log_Q_X = np.empty(L)
    log_Q_Y = np.empty((L,K))
    for l in range(L):
        theta_l = theta_grid[l]

        log_Q_X[l] = log_likelihood(X, theta_l)
        for k in range(K):
            log_Q_Y[l,k] = log_likelihood(Y[k], theta_l)
        if verbose:
            print '%.2f: %.2g, %.2g' % \
              (theta_l, np.exp(log_Q_X[l]), np.exp(log_Q_Y[l].max()))
    log_Q_sum_X = logsumexp(log_Q_X)
    log_Q_sum_Y = np.empty(K)
    for k in range(K):
        log_Q_sum_Y[k] = logsumexp(log_Q_Y[:,k])

    # Step over the grid, calculating approximate p-values
    log_p_plus = np.empty(L)
    log_p_minus = np.empty(L)
    for l in range(L):
        theta_l = theta_grid[l]
        log_w_l = np.empty(K + 1)

        # X contribution
        log_w_l[K] = (theta_l * t_X) - log_Q_sum_X

        # Y contribution
        for k in range(K):
            log_w_l[k] = (theta_l * t_Y[k]) - log_Q_sum_Y[k]

        log_p_num_plus = logsumexp(log_w_l[I_t_Y_plus])
        log_p_num_minus = logsumexp(log_w_l[I_t_Y_minus])
        log_p_denom = logsumexp(log_w_l)

        if verbose:
            print '%.2f: %.2g (%.2g, %.2g)' % \
              (theta_l, log_w_l[K], log_w_l[0:K].min(), log_w_l[0:K].max())

        log_p_plus[l] = log_p_num_plus - log_p_denom
        log_p_minus[l] = log_p_num_minus - log_p_denom

    # p_pm = min(1, 2 * min(p_plus, p_minus))
    log_p_pm = np.fmin(0, np.log(2) + np.fmin(log_p_plus, log_p_minus))
    return invert_test(theta_grid, log_p_pm, np.log(alpha_level))
def ci_conservative_generic(X, K, theta_grid, alpha_level,
                            suff, log_likelihood, sample, t,
                            corrected = True, two_sided = True,
                            verbose = False):
    L = len(theta_grid)
    
    # Generate samples from the mixture proposal distribution
    Y = [sample(theta_grid[np.random.randint(L)]) for k in range(K)]

    # Test statistic at observation, for each grid point
    t_X = t(X).reshape((L, 1))
    if verbose:
        print 'X: t_min = %.2f, t_max = %.2f' % (t_X.min(), t_X.max())
        
    # Statistics for the samples from the proposal distribution only
    # need to be calculated once...
    t_Y = np.empty((L, K+1))
    t_Y[:,K] = 0.0
    for k in range(K):
        t_Y[:,k] = t(Y[k])
        if verbose:
            print 'Y_%d: t_min = %.2f, t_max = %.2f' % \
                (k, t_Y[:,k].min(), t_Y[:,k].max())
    if two_sided:
        I_t_Y_plus = t_Y >= t_X
        I_t_Y_plus[:,K] = True
    I_t_Y_minus = -t_Y >= -t_X
    I_t_Y_minus[:,K] = True

    # Probabilities under each component of the proposal distribution
    # only need to be calculated once...
    log_Q_X = np.empty(L)
    log_Q_Y = np.empty((L,K))
    for l in range(L):
        theta_l = theta_grid[l]

        log_Q_X[l] = log_likelihood(X, theta_l)
        for k in range(K):
            log_Q_Y[l,k] = log_likelihood(Y[k], theta_l)
        if verbose:
            print '%.2f: %.2g, %.2g' % \
              (theta_l, np.exp(log_Q_X[l]), np.exp(log_Q_Y[l].max()))
    log_Q_sum_X = logsumexp(log_Q_X)
    log_Q_sum_Y = np.empty(K)
    for k in range(K):
        log_Q_sum_Y[k] = logsumexp(log_Q_Y[:,k])

    # Step over the grid, calculating approximate p-values
    if two_sided:
        log_p_plus = np.empty(L)
    log_p_minus = np.empty(L)
    for l in range(L):
        theta_l = theta_grid[l]
        log_w_l = np.empty(K + 1)

        # X contribution
        if corrected:
            log_w_l[K] = theta_l * (suff * X).sum() - log_Q_sum_X
        else:
            log_w_l[K] = -np.inf

        # Y contribution
        for k in range(K):
            log_w_l[k] = theta_l * (suff * Y[k]).sum() - log_Q_sum_Y[k]

        if two_sided:
            log_p_num_plus = logsumexp(log_w_l[I_t_Y_plus[l]])
        log_p_num_minus = logsumexp(log_w_l[I_t_Y_minus[l]])
        log_p_denom = logsumexp(log_w_l)

        if verbose:
            print '%.2f: %.2g (%.2g, %.2g)' % \
              (theta_l, log_w_l[K], log_w_l[0:K].min(), log_w_l[0:K].max())

        if two_sided:
            log_p_plus[l] = log_p_num_plus - log_p_denom
        log_p_minus[l] = log_p_num_minus - log_p_denom

    if two_sided:
        # p_pm = min(1, 2 * min(p_plus, p_minus))
        log_p_vals = np.fmin(0, np.log(2) + np.fmin(log_p_plus, log_p_minus))
    else:
        log_p_vals = np.fmin(0, log_p_minus)

    return invert_test(theta_grid, log_p_vals, np.log(alpha_level))
예제 #9
0
def ci_conservative_generic(X,
                            K,
                            theta_grid,
                            alpha_level,
                            log_likelihood,
                            sample,
                            t,
                            verbose=False):
    L = len(theta_grid)

    # Generate samples from the mixture proposal distribution
    Y = []
    for k in range(K):
        l_k = np.random.randint(L)
        theta_k = theta_grid[l_k]
        Y.append(sample(theta_k))

    # Test statistic at observation
    t_X = t(X)

    # Statistics for the samples from the proposal distribution only
    # need to be calculated once...
    t_Y = np.zeros(K + 1)
    for k in range(K):
        t_Y[k] = t(Y[k])
    I_t_Y_plus = t_Y >= t_X
    I_t_Y_plus[K] = True
    I_t_Y_minus = -t_Y >= -t_X
    I_t_Y_minus[K] = True

    # Probabilities under each component of the proposal distribution
    # only need to be calculated once...
    log_Q_X = np.empty(L)
    log_Q_Y = np.empty((L, K))
    for l in range(L):
        theta_l = theta_grid[l]

        log_Q_X[l] = log_likelihood(X, theta_l)
        for k in range(K):
            log_Q_Y[l, k] = log_likelihood(Y[k], theta_l)
        if verbose:
            print '%.2f: %.2g, %.2g' % \
              (theta_l, np.exp(log_Q_X[l]), np.exp(log_Q_Y[l].max()))
    log_Q_sum_X = logsumexp(log_Q_X)
    log_Q_sum_Y = np.empty(K)
    for k in range(K):
        log_Q_sum_Y[k] = logsumexp(log_Q_Y[:, k])

    # Step over the grid, calculating approximate p-values
    log_p_plus = np.empty(L)
    log_p_minus = np.empty(L)
    for l in range(L):
        theta_l = theta_grid[l]
        log_w_l = np.empty(K + 1)

        # X contribution
        log_w_l[K] = (theta_l * t_X) - log_Q_sum_X

        # Y contribution
        for k in range(K):
            log_w_l[k] = (theta_l * t_Y[k]) - log_Q_sum_Y[k]

        log_p_num_plus = logsumexp(log_w_l[I_t_Y_plus])
        log_p_num_minus = logsumexp(log_w_l[I_t_Y_minus])
        log_p_denom = logsumexp(log_w_l)

        if verbose:
            print '%.2f: %.2g (%.2g, %.2g)' % \
              (theta_l, log_w_l[K], log_w_l[0:K].min(), log_w_l[0:K].max())

        log_p_plus[l] = log_p_num_plus - log_p_denom
        log_p_minus[l] = log_p_num_minus - log_p_denom

    # p_pm = min(1, 2 * min(p_plus, p_minus))
    log_p_pm = np.fmin(0, np.log(2) + np.fmin(log_p_plus, log_p_minus))
    return invert_test(theta_grid, log_p_pm, np.log(alpha_level))