def entropy_estimate(a=None, r=1): """ Return entropy ref value, estimates and err estimates for the plugin/nsb/auto estimators. """ if r and r >= 2: true = [] ests = [] err = [] bounds = [] for _ in range(r): S0, x, y, b = entropy_estimate(a=a) true.append(S0) ests.append(x) err.append(y) bounds.append(b) return (numpy.array(true), numpy.array(ests).T, numpy.array(err).T, numpy.array(bounds)) c, S0 = random_counts(a=a) S = [None] * 3 err = [None] * 3 S[0], err[0] = entropy(c) / numpy.log(P), None S[1], err[1] = ndd.entropy(c, return_std=1) / numpy.log(P) entropy_estimate.bounded = ndd.entropy.bounded S[2], err[2] = ndd.entropy(c, k=P, return_std=1) / numpy.log(P) return S0, tuple(S), tuple(err), entropy_estimate.bounded
def compute_estimate(data, k=None, estimator=None, alpha=None): """Entropy estimation given data.""" zk = None if isinstance(data, dict) and 'nk' in data: nk = data['nk'] k = k or data.get('k', None) zk = data.get('zk', None) else: # list of integers counts or mapping to ints nk = data if estimator is not None: try: algorithm = ndd.entropy_estimators[estimator] except KeyError: raise ValueError('Invalid estimator. Available estimators:\n' '%s' % available_estimators) if alpha is not None: # use alpha only if the estimator is given try: estimator = algorithm(alpha=alpha) except TypeError: estimator = algorithm() else: estimator = None if zk: _ = ndd.entropy((nk, zk), k=k, estimator=estimator) else: _ = ndd.entropy(nk, k=k, estimator=estimator) return ndd.entropy.info
def test_histogram_ndarray(): N, P = 100, 3 data = ndd.data.DataArray(random_ndarray(N, P, SEED)) ref_result = 9.107550241712808 counts, k = data.iter_counts() estimate = ndd.entropy(counts, k=k) assert numpy.isclose(estimate, ref_result) # pylint: disable=protected-access
def main(): """Main function.""" results = [] for counts, _, kwargs in cases(): result = ndd.entropy(counts, k=len(counts), **kwargs) results.append(result) json.dump(results, sys.stdout)
def error(alpha, n): """Return the actual error and the estimated uncertainty (normalized)""" k = len(alpha) pvals = dirichlet(alpha) counts = multinomial(n, pvals) h0 = sp_entropy(pvals) h, std = ndd.entropy(counts, k=k, return_std=True) return (h - h0) / h0, std / h0
def ndd_entropy(dist, k, decimals=100): _, counts = np.unique(np.around(dist, decimals=decimals), return_counts=True, axis=0) if counts.shape[0] == 1: # NDD doesn't accept a count vector over 1 thing counts = np.concatenate([counts, [0]], axis=0) return ndd.entropy(counts, k=k)
def nsb_entropy(counts, k): # maximum number of classes; nsb fails otherwise (when k is too large); 10000 is arbitrary here if (k > 10000): k = 10000 try: output = ndd.entropy(counts, k, return_std=True) except: return 0, 0 return output
def discrete_entropy(x): _, count = np.unique(x, return_counts=True, axis=0) return ndd.entropy(count, estimator="Grassberger")
# -*- coding: utf-8 -*- """Fast test.""" import numpy import ndd a = [7, 3, 5, 8, 9, 1, 3, 3, 1, 0, 2, 5, 2, 11, 4, 23, 5, 0, 8, 0] h = ndd.entropy(a) # href = 2.623634344888532 href = 2.623634344902917 absolute_error = numpy.abs(h - href) relative_error = absolute_error / href # smallest positive number in single precision eps = numpy.finfo(numpy.float32).eps try: assert absolute_error < eps except AssertionError: raise AssertionError('estimate %r /= %r' % (h, href)) else: print('%r. Abs. error is %r. Test ok!' % (h, absolute_error))
def cprofile(ar): """Run cprofile""" return [ndd.entropy(x, k=K) for x in ar]
# -*- coding: utf-8 -*- """Check README example.""" import ndd counts = [4, 12, 4, 5, 3, 1, 5, 1, 2, 2, 2, 2, 11, 3, 4, 12, 12, 1, 2] result = ndd.entropy(counts, k=100, return_std=True) print(result) assert result == (2.8060922529931225, 0.11945501149743358)
def test_pmf(pmf): ref = pmf.entropy assert ndd.entropy(pmf.pk) == approx(ref)
# -*- coding: utf-8 -*- """Fast test.""" import numpy import ndd a = [7, 3, 5, 8, 9, 1, 3, 3, 1, 0, 2, 5, 2, 11, 4, 23, 5, 0, 8, 0] h = ndd.entropy(a, k=len(a)) # href = 2.623634344888532 # href = 2.623634344902917 href = 2.6192535776467056 absolute_error = numpy.abs(h - href) relative_error = absolute_error / href # smallest positive number in single precision eps = numpy.finfo(numpy.float32).eps try: assert absolute_error < eps except AssertionError: raise AssertionError('estimate %r /= %r' % (h, href)) else: print('%r. Abs. error is %r. Test ok!' % (h, absolute_error))
def entropy(counts, k): """ndd.entropy() execution time""" start = time.time() _ = ndd.entropy(counts, k=k, return_std=True) end = time.time() return end - start, 0
clouds = rnd(0.2) rain = clouds * rnd(0.7) + (1 - clouds) * rnd(0.2) dark = clouds * rnd(0.9) data.append([clouds, rain, dark]) return numpy.array(data).T with open(os.path.join(tests_dir(), 'data.json'), 'r') as _jf: results = json.load(_jf) @pytest.mark.parametrize('case, ref_result', zip(cases(), results)) def test_entropy(case, ref_result): """Basic tests.""" counts, _, kwargs = case test_result = ndd.entropy(counts, k=len(counts), **kwargs) assert numpy.isclose(test_result, ref_result) def test_histogram_ndarray(): N, P = 100, 3 data = random_ndarray(N, P, SEED) ref_result = 9.107550241712808 assert numpy.isclose( ndd.entropy(ndd.histogram(data), k=ndd.nsb._nbins(data)), ref_result) # pylint: disable=protected-access def test_from_data(): N, P = 100, 3 data = random_ndarray(N, P, SEED) ref_result = 9.107550241712808
def test_pmf_with_zeros(pmf_with_zeros): ref = pmf_with_zeros.entropy print(pmf_with_zeros.pk.sum()) assert ndd.entropy(pmf_with_zeros.pk) == approx(ref)
def test_unnormalized_pmf(): counts = numpy.random.random(size=100) # pylint: disable=no-member pk = counts / counts.sum() assert ndd.entropy(counts) == approx(Pmf().entropy_from_pmf(pk))
def test_counts(counts): estimator = ndd.estimators.AutoEstimator() assert ndd.entropy(counts.nk) == approx( counts.entropy(estimator=estimator))
def test_histogram_ndarray(): N, P = 100, 3 data = random_ndarray(N, P, SEED) ref_result = 9.107550241712808 assert numpy.isclose( ndd.entropy(ndd.histogram(data), k=ndd.nsb._nbins(data)), ref_result) # pylint: disable=protected-access
def rounded(*args, **kwargs): result = ndd.entropy(COUNTS, *args, **kwargs) return round(result, 3)