def ci_cmle_is(X, v, theta_grid, alpha_level, T = 100, verbose = False): cmle_is = np.empty_like(theta_grid) r = X.sum(1) c = X.sum(0) for l, theta_l in enumerate(theta_grid): logit_P_l = theta_l * v w_l = np.exp(logit_P_l) z = cond_a_sample(r, c, w_l, T) logf = np.empty(T) for t in range(T): logQ, logP = z[t][1], z[t][2] logf[t] = logP - logQ logkappa = -np.log(T) + logsumexp(logf) if verbose: logcvsq = -np.log(T - 1) - 2 * logkappa + \ logsumexp(2 * logabsdiffexp(logf, logkappa)) print 'est. cv^2 = %.2f (T = %d)' % (np.exp(logcvsq), T) cmle_is[l] = np.sum(np.log(w_l[X])) - logkappa crit = -0.5 * chi2.ppf(1 - alpha_level, 1) ci = invert_test(theta_grid, cmle_is - cmle_is.max(), crit) if params['plot']: plot_statistics(ax_cmle_is, theta_grid, cmle_is - cmle_is.max(), crit) cmle_is_coverage_data['cis'].append(ci) cmle_is_coverage_data['theta_grid'] = theta_grid cmle_is_coverage_data['crit'] = crit return ci
def log_partition_is(z, cvsq = False): """From importance-weighted sampled, estimate log-partition function.""" T = len(z) logf = np.empty(T) for t in range(T): logf[t] = z[t][2] - z[t][1] logkappa = -np.log(T) + logsumexp(logf) if not cvsq: return logkappa else: logcvsq = -np.log(T - 1) - 2 * logkappa + \ logsumexp(2 * logabsdiffexp(logf, logkappa)) return logkappa, logcvsq
def log_partition_is(z, cvsq=False): """From importance-weighted sampled, estimate log-partition function.""" T = len(z) logf = np.empty(T) for t in range(T): logf[t] = z[t][2] - z[t][1] logkappa = -np.log(T) + logsumexp(logf) if not cvsq: return logkappa else: logcvsq = -np.log(T - 1) - 2 * logkappa + \ logsumexp(2 * logabsdiffexp(logf, logkappa)) return logkappa, logcvsq
def ci_cmle_is(X, v, theta_grid, alpha_level, T = 100, verbose = False): cmle_is = np.empty_like(theta_grid) r = X.sum(1) c = X.sum(0) for l, theta_l in enumerate(theta_grid): logit_P_l = theta_l * v w_l = np.exp(logit_P_l) z = cond_a_sample(r, c, w_l, T) logf = np.empty(T) for t in range(T): logQ, logP = z[t][1], z[t][2] logf[t] = logP - logQ logkappa = -np.log(T) + logsumexp(logf) if verbose: logcvsq = -np.log(T - 1) - 2 * logkappa + \ logsumexp(2 * logabsdiffexp(logf, logkappa)) print 'est. cv^2 = %.2f (T = %d)' % (np.exp(logcvsq), T) cmle_is[l] = np.sum(np.log(w_l[X])) - logkappa return invert_test(theta_grid, cmle_is - cmle_is.max(), -0.5 * chi2.ppf(1 - alpha_level, 1))
def ci_cmle_is(X, v, theta_grid, alpha_level, T=100, verbose=False): cmle_is = np.empty_like(theta_grid) r = X.sum(1) c = X.sum(0) for l, theta_l in enumerate(theta_grid): logit_P_l = theta_l * v w_l = np.exp(logit_P_l) z = cond_a_sample(r, c, w_l, T) logf = np.empty(T) for t in range(T): logQ, logP = z[t][1], z[t][2] logf[t] = logP - logQ logkappa = -np.log(T) + logsumexp(logf) if verbose: logcvsq = -np.log(T - 1) - 2 * logkappa + \ logsumexp(2 * logabsdiffexp(logf, logkappa)) print 'est. cv^2 = %.2f (T = %d)' % (np.exp(logcvsq), T) cmle_is[l] = np.sum(np.log(w_l[X])) - logkappa return invert_test(theta_grid, cmle_is - cmle_is.max(), -0.5 * chi2.ppf(1 - alpha_level, 1))
def ci_conservative_generic(X, K, theta_grid, alpha_level, suff, log_likelihood, sample, t, corrected=True, two_sided=True, verbose=False): L = len(theta_grid) # Generate samples from the mixture proposal distribution Y = [sample(theta_grid[np.random.randint(L)]) for k in range(K)] # Test statistic at observation, for each grid point t_X = t(X).reshape((L, 1)) if verbose: print 'X: t_min = %.2f, t_max = %.2f' % (t_X.min(), t_X.max()) # Statistics for the samples from the proposal distribution only # need to be calculated once... t_Y = np.empty((L, K + 1)) t_Y[:, K] = 0.0 for k in range(K): t_Y[:, k] = t(Y[k]) if verbose: print 'Y_%d: t_min = %.2f, t_max = %.2f' % \ (k, t_Y[:,k].min(), t_Y[:,k].max()) if two_sided: I_t_Y_plus = t_Y >= t_X I_t_Y_plus[:, K] = True I_t_Y_minus = -t_Y >= -t_X I_t_Y_minus[:, K] = True # Probabilities under each component of the proposal distribution # only need to be calculated once... log_Q_X = np.empty(L) log_Q_Y = np.empty((L, K)) for l in range(L): theta_l = theta_grid[l] log_Q_X[l] = log_likelihood(X, theta_l) for k in range(K): log_Q_Y[l, k] = log_likelihood(Y[k], theta_l) if verbose: print '%.2f: %.2g, %.2g' % \ (theta_l, np.exp(log_Q_X[l]), np.exp(log_Q_Y[l].max())) log_Q_sum_X = logsumexp(log_Q_X) log_Q_sum_Y = np.empty(K) for k in range(K): log_Q_sum_Y[k] = logsumexp(log_Q_Y[:, k]) # Step over the grid, calculating approximate p-values if two_sided: log_p_plus = np.empty(L) log_p_minus = np.empty(L) for l in range(L): theta_l = theta_grid[l] log_w_l = np.empty(K + 1) # X contribution if corrected: log_w_l[K] = theta_l * (suff * X).sum() - log_Q_sum_X else: log_w_l[K] = -np.inf # Y contribution for k in range(K): log_w_l[k] = theta_l * (suff * Y[k]).sum() - log_Q_sum_Y[k] if two_sided: log_p_num_plus = logsumexp(log_w_l[I_t_Y_plus[l]]) log_p_num_minus = logsumexp(log_w_l[I_t_Y_minus[l]]) log_p_denom = logsumexp(log_w_l) if verbose: print '%.2f: %.2g (%.2g, %.2g)' % \ (theta_l, log_w_l[K], log_w_l[0:K].min(), log_w_l[0:K].max()) if two_sided: log_p_plus[l] = log_p_num_plus - log_p_denom log_p_minus[l] = log_p_num_minus - log_p_denom if two_sided: # p_pm = min(1, 2 * min(p_plus, p_minus)) log_p_vals = np.fmin(0, np.log(2) + np.fmin(log_p_plus, log_p_minus)) else: log_p_vals = np.fmin(0, log_p_minus) return invert_test(theta_grid, log_p_vals, np.log(alpha_level))
def ci_conservative_generic(X, K, theta_grid, alpha_level, log_likelihood, sample, t, verbose = False): L = len(theta_grid) # Generate samples from the mixture proposal distribution Y = [] for k in range(K): l_k = np.random.randint(L) theta_k = theta_grid[l_k] Y.append(sample(theta_k)) # Test statistic at observation t_X = t(X) # Statistics for the samples from the proposal distribution only # need to be calculated once... t_Y = np.zeros(K + 1) for k in range(K): t_Y[k] = t(Y[k]) I_t_Y_plus = t_Y >= t_X I_t_Y_plus[K] = True I_t_Y_minus = -t_Y >= -t_X I_t_Y_minus[K] = True # Probabilities under each component of the proposal distribution # only need to be calculated once... log_Q_X = np.empty(L) log_Q_Y = np.empty((L,K)) for l in range(L): theta_l = theta_grid[l] log_Q_X[l] = log_likelihood(X, theta_l) for k in range(K): log_Q_Y[l,k] = log_likelihood(Y[k], theta_l) if verbose: print '%.2f: %.2g, %.2g' % \ (theta_l, np.exp(log_Q_X[l]), np.exp(log_Q_Y[l].max())) log_Q_sum_X = logsumexp(log_Q_X) log_Q_sum_Y = np.empty(K) for k in range(K): log_Q_sum_Y[k] = logsumexp(log_Q_Y[:,k]) # Step over the grid, calculating approximate p-values log_p_plus = np.empty(L) log_p_minus = np.empty(L) for l in range(L): theta_l = theta_grid[l] log_w_l = np.empty(K + 1) # X contribution log_w_l[K] = (theta_l * t_X) - log_Q_sum_X # Y contribution for k in range(K): log_w_l[k] = (theta_l * t_Y[k]) - log_Q_sum_Y[k] log_p_num_plus = logsumexp(log_w_l[I_t_Y_plus]) log_p_num_minus = logsumexp(log_w_l[I_t_Y_minus]) log_p_denom = logsumexp(log_w_l) if verbose: print '%.2f: %.2g (%.2g, %.2g)' % \ (theta_l, log_w_l[K], log_w_l[0:K].min(), log_w_l[0:K].max()) log_p_plus[l] = log_p_num_plus - log_p_denom log_p_minus[l] = log_p_num_minus - log_p_denom # p_pm = min(1, 2 * min(p_plus, p_minus)) log_p_pm = np.fmin(0, np.log(2) + np.fmin(log_p_plus, log_p_minus)) return invert_test(theta_grid, log_p_pm, np.log(alpha_level))
def ci_conservative_generic(X, K, theta_grid, alpha_level, suff, log_likelihood, sample, t, corrected = True, two_sided = True, verbose = False): L = len(theta_grid) # Generate samples from the mixture proposal distribution Y = [sample(theta_grid[np.random.randint(L)]) for k in range(K)] # Test statistic at observation, for each grid point t_X = t(X).reshape((L, 1)) if verbose: print 'X: t_min = %.2f, t_max = %.2f' % (t_X.min(), t_X.max()) # Statistics for the samples from the proposal distribution only # need to be calculated once... t_Y = np.empty((L, K+1)) t_Y[:,K] = 0.0 for k in range(K): t_Y[:,k] = t(Y[k]) if verbose: print 'Y_%d: t_min = %.2f, t_max = %.2f' % \ (k, t_Y[:,k].min(), t_Y[:,k].max()) if two_sided: I_t_Y_plus = t_Y >= t_X I_t_Y_plus[:,K] = True I_t_Y_minus = -t_Y >= -t_X I_t_Y_minus[:,K] = True # Probabilities under each component of the proposal distribution # only need to be calculated once... log_Q_X = np.empty(L) log_Q_Y = np.empty((L,K)) for l in range(L): theta_l = theta_grid[l] log_Q_X[l] = log_likelihood(X, theta_l) for k in range(K): log_Q_Y[l,k] = log_likelihood(Y[k], theta_l) if verbose: print '%.2f: %.2g, %.2g' % \ (theta_l, np.exp(log_Q_X[l]), np.exp(log_Q_Y[l].max())) log_Q_sum_X = logsumexp(log_Q_X) log_Q_sum_Y = np.empty(K) for k in range(K): log_Q_sum_Y[k] = logsumexp(log_Q_Y[:,k]) # Step over the grid, calculating approximate p-values if two_sided: log_p_plus = np.empty(L) log_p_minus = np.empty(L) for l in range(L): theta_l = theta_grid[l] log_w_l = np.empty(K + 1) # X contribution if corrected: log_w_l[K] = theta_l * (suff * X).sum() - log_Q_sum_X else: log_w_l[K] = -np.inf # Y contribution for k in range(K): log_w_l[k] = theta_l * (suff * Y[k]).sum() - log_Q_sum_Y[k] if two_sided: log_p_num_plus = logsumexp(log_w_l[I_t_Y_plus[l]]) log_p_num_minus = logsumexp(log_w_l[I_t_Y_minus[l]]) log_p_denom = logsumexp(log_w_l) if verbose: print '%.2f: %.2g (%.2g, %.2g)' % \ (theta_l, log_w_l[K], log_w_l[0:K].min(), log_w_l[0:K].max()) if two_sided: log_p_plus[l] = log_p_num_plus - log_p_denom log_p_minus[l] = log_p_num_minus - log_p_denom if two_sided: # p_pm = min(1, 2 * min(p_plus, p_minus)) log_p_vals = np.fmin(0, np.log(2) + np.fmin(log_p_plus, log_p_minus)) else: log_p_vals = np.fmin(0, log_p_minus) return invert_test(theta_grid, log_p_vals, np.log(alpha_level))
def ci_conservative_generic(X, K, theta_grid, alpha_level, log_likelihood, sample, t, verbose=False): L = len(theta_grid) # Generate samples from the mixture proposal distribution Y = [] for k in range(K): l_k = np.random.randint(L) theta_k = theta_grid[l_k] Y.append(sample(theta_k)) # Test statistic at observation t_X = t(X) # Statistics for the samples from the proposal distribution only # need to be calculated once... t_Y = np.zeros(K + 1) for k in range(K): t_Y[k] = t(Y[k]) I_t_Y_plus = t_Y >= t_X I_t_Y_plus[K] = True I_t_Y_minus = -t_Y >= -t_X I_t_Y_minus[K] = True # Probabilities under each component of the proposal distribution # only need to be calculated once... log_Q_X = np.empty(L) log_Q_Y = np.empty((L, K)) for l in range(L): theta_l = theta_grid[l] log_Q_X[l] = log_likelihood(X, theta_l) for k in range(K): log_Q_Y[l, k] = log_likelihood(Y[k], theta_l) if verbose: print '%.2f: %.2g, %.2g' % \ (theta_l, np.exp(log_Q_X[l]), np.exp(log_Q_Y[l].max())) log_Q_sum_X = logsumexp(log_Q_X) log_Q_sum_Y = np.empty(K) for k in range(K): log_Q_sum_Y[k] = logsumexp(log_Q_Y[:, k]) # Step over the grid, calculating approximate p-values log_p_plus = np.empty(L) log_p_minus = np.empty(L) for l in range(L): theta_l = theta_grid[l] log_w_l = np.empty(K + 1) # X contribution log_w_l[K] = (theta_l * t_X) - log_Q_sum_X # Y contribution for k in range(K): log_w_l[k] = (theta_l * t_Y[k]) - log_Q_sum_Y[k] log_p_num_plus = logsumexp(log_w_l[I_t_Y_plus]) log_p_num_minus = logsumexp(log_w_l[I_t_Y_minus]) log_p_denom = logsumexp(log_w_l) if verbose: print '%.2f: %.2g (%.2g, %.2g)' % \ (theta_l, log_w_l[K], log_w_l[0:K].min(), log_w_l[0:K].max()) log_p_plus[l] = log_p_num_plus - log_p_denom log_p_minus[l] = log_p_num_minus - log_p_denom # p_pm = min(1, 2 * min(p_plus, p_minus)) log_p_pm = np.fmin(0, np.log(2) + np.fmin(log_p_plus, log_p_minus)) return invert_test(theta_grid, log_p_pm, np.log(alpha_level))