def information_model(e, m, epsilon=1.0e-5): I = pymc.Uniform(name='mutual information', lower=epsilon, upper=1.0-epsilon) belb = ibeb.bayes_error_lower_bound(Y_entropy=1.0, num_classes=2) @pymc.deterministic(name='Bayes error lower bound') def bayes_error_lb(I=I): return belb(I) @pymc.deterministic(name='Bayes error upper bound') def bayes_error_ub(I=I): return ibeb.bayes_error_upper_bound(I) epsilon_B = pymc.Uniform(name='Bayes error', lower=bayes_error_lb, upper=bayes_error_ub) epsilon = pymc.Uniform(name='generalization error', lower=epsilon_B, upper=0.5) error = pymc.Binomial(name='observed number of errors', n=m, p=epsilon, observed=True, value=e) return locals()
def data_likelihood(m, lower=0.0, upper=1.0, N=100000, counts_min=5): """Compute p(e|0>I(X;Y)>=1,m) via Monte Carlo with N iterations. upper, lower = bounds of mutual information. """ be_lb = ibeb.bayes_error_lower_bound(Y_entropy=1.0, num_classes=2, cache_size=100000) be_ub = ibeb.bayes_error_upper_bound # Monte Carlo: MI = np.random.uniform(low=lower, high=upper, size=N) epsilon_B_lower = be_lb(MI) epsilon_B_upper = be_ub(MI) epsilon_B = np.random.uniform(low=epsilon_B_lower, high=epsilon_B_upper) epsilon = np.random.uniform(low=epsilon_B, high=0.5) e = np.random.binomial(m,epsilon) # Statistics: e_counts, bins = np.histogram(e,bins=range(m+2)) e_frequency = e_counts / np.double(N) e_frequency[e_counts<counts_min] = np.NaN # mark less reliable values with NaN return e_frequency