def LpEntropy(dat,p=None): """ Estimates the joint entropy (in nats) of a Lp-spherically symmetric distributed source without explicit knowledge of the radial distribution. If p is not specified, it is estimated by fitting a pCauchy distribution to the ratios. :param dat: Lp-spherically symmetric distributed sources :type dat: natter.DataModule.Data :param p: p of the Lp-spherically symmetric source (default: None) :type p: float :returns: entropy in nats :rtype: float """ # estimate p with a pCauchy distribution n = dat.dim() if p is None: from natter.Distributions import PCauchy pCauchy = PCauchy(n=n-1) Z = zeros((n-1,dat.numex())) normalizingDims = randint(n,size=(dat.numex(),)) for k in xrange(n): ind = (normalizingDims == k) Z[:,ind] = dat.X[:,ind][range(k) + range(k+1,n),:]/atleast_2d(dat.X[k,ind]) dat2 = Data(Z) dat2.X = dat2.X[:,isfinite(sum(dat2.X,axis=0))] pCauchy.estimate(dat2) p = pCauchy['p'] print "\tUsing p=%.2f" % (p,) # estimate the entropy via r = dat.norm(p=p) return marginalEntropy(r)[0,0] + (n-1)*mean(log(r.X)) + logSurfacePSphere(n,p)
def ppf(self,u,bounds=None,maxiter=1000): ''' Evaluates the percentile function (inverse c.d.f.) for a given array of quantiles. The single mixture components must implement ppf and pdf. NOTE: ppf works only for one dimensional mixture distributions. :param u: Percentiles for which the ppf will be computed. :type u: numpy.array :param bounds: a tuple of two array of the same size of u that specifies the initial upper and lower boundaries for the bisection method. :type bounds: tuple of two numpy.array :param maxiter: maximum number of iterations :type maxiter: int :returns: A Data object containing the values of the ppf. :rtype: natter.DataModule.Data ''' ret = Data(u,'Percentiles from ' + self.name) # use bisection method on to invert #v = squeeze(log(u/(1-u))) if bounds is not None: lb = Data(bounds[0]) ub = Data(bounds[1]) elif self.param['P'][0].param.has_key('a') and self.param['P'][0].param.has_key('b'): warn("\tAssuming that the keys a=%.2g and b=%.2g in %s refer to boundaries. Using those..." % (self.param['P'][0]['a'],self.param['P'][0]['b'],self.param['P'][0].name,)) lb = Data(0*u+self.param['P'][0]['a']) ub = Data(0*u+self.param['P'][0]['b']) else: lb = Data(u*0-1e6) ub = Data(u*0+1e6) def f(dat): # c = self.cdf(dat) # return v - log(c/(1-c)) return u-self.cdf(dat) iterC = 0 while max(ub.X-lb.X) > 5*1e-10 and iterC < maxiter: ret.X = (ub.X+lb.X)/2 mf = f(ret) lf = f(lb) uf = f(ub) if any(lf*uf>0): warn("ppf lost the root! resetting boundaries") ind0 = where(lf*uf > 0) ub.X[0,ind0[0]] = 4*abs(ub.X[0,ind0[0]]+1) lb.X[0,ind0[0]] = -4*abs(lb.X[0,ind0[0]]+1) ind0 = where(mf*lf < 0) ind1 = where(mf*uf < 0) ub.X[0,ind0[0]] = ret.X[0,ind0[0]] lb.X[0,ind1[0]] = ret.X[0,ind1[0]] iterC +=1 sys.stdout.write(80*" " + "\r\tFiniteMixtureDistribution.ppf maxdiff: %.4g, meandiff: %.4g" % (max(ub.X-lb.X),mean(ub.X-lb.X))) sys.stdout.flush() if iterC == maxiter: warn("FiniteMixtureDistribution.ppf: Maxiter reached! Exiting. Bisection method might not have been converged. Maxdiff is %.10g. Mean diff is %.4g" % ( max(ub.X-lb.X),mean(ub.X-lb.X))) #sys.stdout.write("\n") return ret
def ppf(self,u,maxiter=500, tol = 1e-5): ''' Evaluates the percent point function (i.e. the inverse c.d.f.) of the mixture of Gaussians distribution. It uses a Newton-Raphson method with preinitialization. :param u: Points at which the p.p.f. will be computed. :type u: numpy.array :param maxiter: maximum number of iterations :param tol: convergence tolerance :returns: Data object with the resulting points in the domain of this distribution. :rtype: natter.DataModule.Data ''' # preinitialization: if there was just a single Gaussian # weighted by pi_k, the cdf would saturize to pi_k, the cdf of # this Gaussians mean would lie at pi_k/2. If the Gaussians # were we separated, the cdf ranges would approximately split # up [0,1] in [0,pi_1,pi-1+pi_2, ..., 1]. We initialize the x # for each u with the mean of the Gaussian that corresponds to # that interval. print "\tpreinitialize ..." U = cumsum(self.param['pi']) X = 0*u m = max(u.shape) for i in xrange(m): k = 0 while u[i] > U[k]: k +=1 X[i] = self.param['mu'][k] dat = Data(X,'Function values of the p.p.f of %s' % (self.name,)) iteration = 0 sys.stderr.write("\tNewton-Raphson ...") while iteration < maxiter and max(abs(u-self.cdf(dat))) > tol: sys.stderr.write('%03i\b\b\b' % (iteration,)) iteration += 1 dat.X = dat.X - (self.cdf(dat)-u)/ 2 /(self.pdf(dat) + 1e-2) print "" if max(abs(u-self.cdf(dat))) > tol: print "\tWARNING! natter.Distributions.MixtureOfGaussians: ppf did not converge!" print max(abs(u-self.cdf(dat))) return dat
def ppf(self, u, maxiter=500, tol=1e-5): ''' Evaluates the percent point function (i.e. the inverse c.d.f.) of the mixture of Gaussians distribution. It uses a Newton-Raphson method with preinitialization. :param u: Points at which the p.p.f. will be computed. :type u: numpy.array :param maxiter: maximum number of iterations :param tol: convergence tolerance :returns: Data object with the resulting points in the domain of this distribution. :rtype: natter.DataModule.Data ''' # preinitialization: if there was just a single Gaussian # weighted by pi_k, the cdf would saturize to pi_k, the cdf of # this Gaussians mean would lie at pi_k/2. If the Gaussians # were we separated, the cdf ranges would approximately split # up [0,1] in [0,pi_1,pi-1+pi_2, ..., 1]. We initialize the x # for each u with the mean of the Gaussian that corresponds to # that interval. print "\tpreinitialize ..." U = cumsum(self.param['pi']) X = 0 * u m = max(u.shape) for i in xrange(m): k = 0 while u[i] > U[k]: k += 1 X[i] = self.param['mu'][k] dat = Data(X, 'Function values of the p.p.f of %s' % (self.name, )) iteration = 0 sys.stderr.write("\tNewton-Raphson ...") while iteration < maxiter and max(abs(u - self.cdf(dat))) > tol: sys.stderr.write('%03i\b\b\b' % (iteration, )) iteration += 1 dat.X = dat.X - (self.cdf(dat) - u) / 2 / (self.pdf(dat) + 1e-2) print "" if max(abs(u - self.cdf(dat))) > tol: print "\tWARNING! natter.Distributions.MixtureOfGaussians: ppf did not converge!" print max(abs(u - self.cdf(dat))) return dat
def LpEntropy(dat, p=None): """ Estimates the joint entropy (in nats) of a Lp-spherically symmetric distributed source without explicit knowledge of the radial distribution. If p is not specified, it is estimated by fitting a pCauchy distribution to the ratios. :param dat: Lp-spherically symmetric distributed sources :type dat: natter.DataModule.Data :param p: p of the Lp-spherically symmetric source (default: None) :type p: float :returns: entropy in nats :rtype: float """ # estimate p with a pCauchy distribution n = dat.dim() if p is None: from natter.Distributions import PCauchy pCauchy = PCauchy(n=n - 1) Z = zeros((n - 1, dat.numex())) normalizingDims = randint(n, size=(dat.numex(), )) for k in xrange(n): ind = (normalizingDims == k) Z[:, ind] = dat.X[:, ind][range(k) + range(k + 1, n), :] / atleast_2d( dat.X[k, ind]) dat2 = Data(Z) dat2.X = dat2.X[:, isfinite(sum(dat2.X, axis=0))] pCauchy.estimate(dat2) p = pCauchy['p'] print "\tUsing p=%.2f" % (p, ) # estimate the entropy via r = dat.norm(p=p) return marginalEntropy(r)[0, 0] + (n - 1) * mean(log( r.X)) + logSurfacePSphere(n, p)