def H_Diri(mk, K, N): mp.pretty = True K1 = K - mk[0] beta_MAP = _beta_MAP(mk, K, K1, N) if beta_MAP == 1.: def f(w): return _rho_xi_w(w, mk, K, N) def g(w): return _H1_w(w, mk, K, K1, N) * _rho_xi_w(w, mk, K, N) H_nsb = mp.quadgl(g, [0, 1]) / mp.quadgl(f, [0, 1]) else: # std of Gaussian approximation at MAP parameter std = np.sqrt(-_d2logrho_xi(beta_MAP, mk, K, N)**(-1)) # Set integration bounds to pm 8std around MAP beta intbounds = [ np.amax([10**(-50), beta_MAP - 8 * std]), beta_MAP + 8 * std ] def f(beta): return _rho_xi(beta, mk, K, N) def g(beta): return _H1(beta, mk, K, K1, N) * _rho_xi(beta, mk, K, N) H_nsb = mp.quadgl(g, intbounds) / mp.quadgl(f, intbounds) # Computes NSB estimator # stdH_nsb=mp.sqrt(mp.quadgl(lambda beta: _H2(beta,mk,K,K1,N)*_rho_xi(beta,mk,K,N), intbounds)/rhonorm-H_nsb*H_nsb) #Computes std of NSB estimator return H_nsb
def nsb_entropy(mk, K, N): """ Estimate the entropy of a system using the NSB estimator. :param mk: multiplicities :param K: number of possible symbols/ state space of the system :param N: total number of observed symbols """ mp.pretty = True # find the concentration parameter beta # for which the posterior is maximised # to integrate around this peak integration_bounds = get_integration_bounds(mk, K, N) if np.any(np.isnan(integration_bounds)): # if no peak was found, integrate over the whole range # by reformulating beta into w so that the range goes from 0 to 1 # instead of from 1 to infinity integration_bounds = [0, 1] def unnormalized_posterior_w(w, mk, K, N): sbeta = w / (1 - w) beta = sbeta * sbeta return unnormalized_posterior(beta, mk, K, N) * 2 * sbeta / (1 - w) / (1 - w) def H1_w(w, mk, K, N): sbeta = w / (1 - w) beta = sbeta * sbeta return H1(w, mk, K, N) marginal_likelihood = mp.quadgl( lambda w: unnormalized_posterior_w(w, mk, K, N), integration_bounds) H_nsb = mp.quadgl( lambda w: H1_w(w, mk, K, N) * unnormalized_posterior_w( w, mk, K, N), integration_bounds) / marginal_likelihood else: # integrate over the possible entropies, weighted such that every entropy is equally likely # and normalize with the marginal likelihood marginal_likelihood = mp.quadgl( lambda beta: unnormalized_posterior(beta, mk, K, N), integration_bounds) H_nsb = mp.quadgl( lambda beta: H1(beta, mk, K, N) * unnormalized_posterior( beta, mk, K, N), integration_bounds) / marginal_likelihood return H_nsb
def S(nxkx, N, K): """ Return the estimated entropy. nxkx is the histogram of the input histogram constructed by make_nxkx. N is the total number of elements, and K is the degree of freedom. >>> from numpy import array >>> nTest = array([4, 2, 3, 0, 2, 4, 0, 0, 2]) >>> K = 9 # which is actually equal to nTest.size. >>> S(make_nxkx(nTest, K), nTest.sum(), K) 1.9406467285026877476 """ mp.dps = DPS mp.pretty = True f = lambda w: _Si(w, nxkx, N, K) g = lambda w: _measure(w, nxkx, N, K) return quadgl(f, [0, 1]) / quadgl(g, [0, 1])
def S(x, N, K): """ Return the estimated entropy. x is a vector of counts, nxkx is the histogram of the input histogram constructed by make_nxkx. N is the total number of elements, and K is the degree of freedom. >>> from numpy import array >>> nTest = array([4, 2, 3, 0, 2, 4, 0, 0, 2]) >>> K = 9 # which is actually equal to nTest.size. >>> S(make_nxkx(nTest, K), nTest.sum(), K) 1.9406467285026877476 """ nxkx = make_nxkx(x,K) mp.dps = DPS mp.pretty = True f = lambda w: _Si(w, nxkx, N, K) g = lambda w: _measure(w, nxkx, N, K) return np.log2(np.exp(1))*quadgl(f, [0, 1])/quadgl(g, [0, 1])
def H_NSB(mk, K, N): mp.pretty = True K1 = K - mk[0] beta_ML = _beta_ML(mk, K, K1, N) beta_MAP = _beta_MAP(mk, K, K1, N) # std of Gaussian approximation at MAP parameter std = np.sqrt(-_d2logrho_xi(beta_MAP, mk, K, N)**(-1)) # Set integration bounds to pm 8std around MAP beta intbounds = [np.amax([10**(-50), beta_MAP - 8 * std]), beta_MAP + 8 * std] rhonorm = mp.quadgl(lambda beta: _rho_xi(beta, mk, K, N), intbounds) # Compute normalization constant H_ML = _H1(beta_ML, mk, K, K1, N) # Compute H with ML prior H_nsb = mp.quadgl( lambda beta: _H1(beta, mk, K, K1, N) * _rho_xi(beta, mk, K, N), intbounds) / rhonorm # Computes NSB estimator stdH_nsb = mp.sqrt( mp.quadgl( lambda beta: _H2(beta, mk, K, K1, N) * _rho_xi(beta, mk, K, N), intbounds) / rhonorm - H_nsb * H_nsb) # Computes std of NSB estimator return H_ML, H_nsb, stdH_nsb, beta_ML
def dS(nxkx, N, K): """ Return the mean squared flucuation of the entropy. >>> from numpy import array, sqrt >>> nTest = np.array([4, 2, 3, 0, 2, 4, 0, 0, 2]) >>> K = 9 # which is actually equal to nTest.size. >>> nxkx = make_nxkx(nTest, K) >>> s = S(nxkx, nTest.sum(), K) >>> ds = dS(nxkx, nTest.sum(), K) >>> ds 3.7904532836824960524 >>> sqrt(ds-s**2) # the standard deviation for the estimated entropy. 0.15602422515209426008 """ mp.dps = DPS mp.pretty = True f = lambda w: _dSi(w, nxkx, N, K) g = lambda w: _measure(w, nxkx, N, K) return quadgl(f, [0, 1]) / quadgl(g, [0, 1])
def dS(nxkx, N, K): """ Return the mean squared flucuation of the entropy. >>> from numpy import array, sqrt >>> nTest = np.array([4, 2, 3, 0, 2, 4, 0, 0, 2]) >>> K = 9 # which is actually equal to nTest.size. >>> nxkx = make_nxkx(nTest, K) >>> s = S(nxkx, nTest.sum(), K) >>> ds = dS(nxkx, nTest.sum(), K) >>> ds 3.790453283682443237187782792251041316212 >>> sqrt(ds-s**2) # the standard deviation for the estimated entropy. 0.1560242251518078487118059349690693094484 """ mp.dps = DPS mp.pretty = True f = lambda w: _dSi(w, nxkx, N, K) g = lambda w: _measure(w, nxkx, N, K) return quadgl(f, [0, 1],maxdegree=20)/quadgl(g, [0, 1],maxdegree=20)
def dS(x, N, K): """ Returns the Variance in the entropy >>> from numpy import array, sqrt >>> nTest = np.array([4, 2, 3, 0, 2, 4, 0, 0, 2]) >>> K = 9 # which is actually equal to nTest.size. >>> nxkx = make_nxkx(nTest, K) >>> s = S(nxkx, nTest.sum(), K) >>> ds = dS(nxkx, nTest.sum(), K) >>> ds 3.7904532836824960524 >>> sqrt(ds-s**2) # the standard deviation for the estimated entropy. 0.15602422515209426008 """ nxkx = make_nxkx(x,K) mp.dps = DPS mp.pretty = True f = lambda w: _dSi(w, nxkx, N, K) g = lambda w: _measure(w, nxkx, N, K) return ( np.log2(np.exp(1))*quadgl(f, [0, 1])/quadgl(g, [0, 1]) - S(x,N,K)**2/np.log2(np.exp(1)))