コード例 #1
0
ファイル: Entropy.py プロジェクト: fabiansinz/natter
def LpEntropy(dat,p=None):
    """
    Estimates the joint entropy (in nats) of a Lp-spherically
    symmetric distributed source without explicit knowledge of the
    radial distribution. If p is not specified, it is estimated by
    fitting a pCauchy distribution to the ratios.

    :param dat: Lp-spherically symmetric distributed sources
    :type dat:  natter.DataModule.Data
    :param p: p of the Lp-spherically symmetric source (default: None)
    :type p: float
    :returns: entropy in nats
    :rtype: float
    """
    # estimate p with a pCauchy distribution
    n = dat.dim()
    if p is None:
        from natter.Distributions import PCauchy
        pCauchy = PCauchy(n=n-1)
        Z = zeros((n-1,dat.numex()))
        normalizingDims = randint(n,size=(dat.numex(),))
        for k in xrange(n):
            ind = (normalizingDims == k)
            Z[:,ind] = dat.X[:,ind][range(k) + range(k+1,n),:]/atleast_2d(dat.X[k,ind])
        dat2 = Data(Z)
        dat2.X = dat2.X[:,isfinite(sum(dat2.X,axis=0))]
        pCauchy.estimate(dat2)
        p = pCauchy['p']
        print "\tUsing p=%.2f" % (p,)

    # estimate the entropy via
    r = dat.norm(p=p)
    return marginalEntropy(r)[0,0]  + (n-1)*mean(log(r.X)) + logSurfacePSphere(n,p)
コード例 #2
0
    def ppf(self,u,bounds=None,maxiter=1000):
        '''

        Evaluates the percentile function (inverse c.d.f.) for a given
        array of quantiles. The single mixture components must
        implement ppf and pdf.

        NOTE: ppf works only for one dimensional mixture distributions.

        :param u: Percentiles for which the ppf will be computed.
        :type u: numpy.array
        :param bounds: a tuple of two array of the same size of u that specifies the initial upper and lower boundaries for the bisection method.
        :type bounds: tuple of two numpy.array
        :param maxiter: maximum number of iterations
        :type maxiter: int
        :returns:  A Data object containing the values of the ppf.
        :rtype:    natter.DataModule.Data
           
        '''

        ret = Data(u,'Percentiles from ' + self.name)
        # use bisection method on to invert
        #v = squeeze(log(u/(1-u)))
        if bounds is not None:
            lb = Data(bounds[0])
            ub = Data(bounds[1])
        elif self.param['P'][0].param.has_key('a') and self.param['P'][0].param.has_key('b'):
            warn("\tAssuming that the keys a=%.2g and b=%.2g in %s refer to boundaries. Using those..." % (self.param['P'][0]['a'],self.param['P'][0]['b'],self.param['P'][0].name,))
            lb = Data(0*u+self.param['P'][0]['a'])
            ub = Data(0*u+self.param['P'][0]['b'])
        else:
            lb = Data(u*0-1e6)
            ub = Data(u*0+1e6)
        def f(dat):
            # c = self.cdf(dat)
            # return v - log(c/(1-c))
            return u-self.cdf(dat)

        iterC = 0
        while max(ub.X-lb.X) > 5*1e-10 and iterC < maxiter:
            ret.X = (ub.X+lb.X)/2
            mf = f(ret)
            lf = f(lb)
            uf = f(ub)
            if any(lf*uf>0):
                warn("ppf lost the root! resetting boundaries")
                ind0 = where(lf*uf > 0)
                ub.X[0,ind0[0]] = 4*abs(ub.X[0,ind0[0]]+1)
                lb.X[0,ind0[0]] = -4*abs(lb.X[0,ind0[0]]+1)
            ind0 = where(mf*lf < 0)
            ind1 = where(mf*uf < 0)
            ub.X[0,ind0[0]] = ret.X[0,ind0[0]]
            lb.X[0,ind1[0]] = ret.X[0,ind1[0]]
            iterC +=1
            sys.stdout.write(80*" " + "\r\tFiniteMixtureDistribution.ppf maxdiff: %.4g, meandiff: %.4g" % (max(ub.X-lb.X),mean(ub.X-lb.X)))
            sys.stdout.flush()
        if iterC == maxiter:
            warn("FiniteMixtureDistribution.ppf: Maxiter reached! Exiting. Bisection method might not have been converged. Maxdiff is %.10g. Mean diff is %.4g" % ( max(ub.X-lb.X),mean(ub.X-lb.X)))
        #sys.stdout.write("\n")
        return ret
コード例 #3
0
    def ppf(self,u,maxiter=500, tol = 1e-5):
        '''

        Evaluates the percent point function (i.e. the inverse c.d.f.)
        of the mixture of Gaussians distribution.

        It uses a Newton-Raphson method with preinitialization.
        
        :param u:  Points at which the p.p.f. will be computed.
        :type u: numpy.array
        :param maxiter: maximum number of iterations
        :param tol: convergence tolerance
        :returns:  Data object with the resulting points in the domain of this distribution. 
        :rtype:    natter.DataModule.Data
           
        '''


        # preinitialization: if there was just a single Gaussian
        # weighted by pi_k, the cdf would saturize to pi_k, the cdf of
        # this Gaussians mean would lie at pi_k/2. If the Gaussians
        # were we separated, the cdf ranges would approximately split
        # up [0,1] in [0,pi_1,pi-1+pi_2, ..., 1]. We initialize the x
        # for each u with the mean of the Gaussian that corresponds to
        # that interval.

        print "\tpreinitialize ..."
        U = cumsum(self.param['pi'])
        X = 0*u
        m = max(u.shape)
        for i in xrange(m):
            k = 0
            while u[i] > U[k]:
                k +=1
            X[i] = self.param['mu'][k]
        
        
        
        dat = Data(X,'Function values of the p.p.f of %s' % (self.name,))
        iteration = 0
        sys.stderr.write("\tNewton-Raphson ...")
        while iteration < maxiter and max(abs(u-self.cdf(dat))) > tol:
            sys.stderr.write('%03i\b\b\b' % (iteration,))
            iteration += 1
            dat.X = dat.X - (self.cdf(dat)-u)/ 2 /(self.pdf(dat) + 1e-2)
        print ""
        if max(abs(u-self.cdf(dat))) > tol:
            print "\tWARNING! natter.Distributions.MixtureOfGaussians: ppf did not converge!"
            print max(abs(u-self.cdf(dat)))
        
        return dat
コード例 #4
0
    def ppf(self, u, maxiter=500, tol=1e-5):
        '''

        Evaluates the percent point function (i.e. the inverse c.d.f.)
        of the mixture of Gaussians distribution.

        It uses a Newton-Raphson method with preinitialization.
        
        :param u:  Points at which the p.p.f. will be computed.
        :type u: numpy.array
        :param maxiter: maximum number of iterations
        :param tol: convergence tolerance
        :returns:  Data object with the resulting points in the domain of this distribution. 
        :rtype:    natter.DataModule.Data
           
        '''

        # preinitialization: if there was just a single Gaussian
        # weighted by pi_k, the cdf would saturize to pi_k, the cdf of
        # this Gaussians mean would lie at pi_k/2. If the Gaussians
        # were we separated, the cdf ranges would approximately split
        # up [0,1] in [0,pi_1,pi-1+pi_2, ..., 1]. We initialize the x
        # for each u with the mean of the Gaussian that corresponds to
        # that interval.

        print "\tpreinitialize ..."
        U = cumsum(self.param['pi'])
        X = 0 * u
        m = max(u.shape)
        for i in xrange(m):
            k = 0
            while u[i] > U[k]:
                k += 1
            X[i] = self.param['mu'][k]

        dat = Data(X, 'Function values of the p.p.f of %s' % (self.name, ))
        iteration = 0
        sys.stderr.write("\tNewton-Raphson ...")
        while iteration < maxiter and max(abs(u - self.cdf(dat))) > tol:
            sys.stderr.write('%03i\b\b\b' % (iteration, ))
            iteration += 1
            dat.X = dat.X - (self.cdf(dat) - u) / 2 / (self.pdf(dat) + 1e-2)
        print ""
        if max(abs(u - self.cdf(dat))) > tol:
            print "\tWARNING! natter.Distributions.MixtureOfGaussians: ppf did not converge!"
            print max(abs(u - self.cdf(dat)))

        return dat
コード例 #5
0
def LpEntropy(dat, p=None):
    """
    Estimates the joint entropy (in nats) of a Lp-spherically
    symmetric distributed source without explicit knowledge of the
    radial distribution. If p is not specified, it is estimated by
    fitting a pCauchy distribution to the ratios.

    :param dat: Lp-spherically symmetric distributed sources
    :type dat:  natter.DataModule.Data
    :param p: p of the Lp-spherically symmetric source (default: None)
    :type p: float
    :returns: entropy in nats
    :rtype: float
    """
    # estimate p with a pCauchy distribution
    n = dat.dim()
    if p is None:
        from natter.Distributions import PCauchy
        pCauchy = PCauchy(n=n - 1)
        Z = zeros((n - 1, dat.numex()))
        normalizingDims = randint(n, size=(dat.numex(), ))
        for k in xrange(n):
            ind = (normalizingDims == k)
            Z[:,
              ind] = dat.X[:, ind][range(k) + range(k + 1, n), :] / atleast_2d(
                  dat.X[k, ind])
        dat2 = Data(Z)
        dat2.X = dat2.X[:, isfinite(sum(dat2.X, axis=0))]
        pCauchy.estimate(dat2)
        p = pCauchy['p']
        print "\tUsing p=%.2f" % (p, )

    # estimate the entropy via
    r = dat.norm(p=p)
    return marginalEntropy(r)[0, 0] + (n - 1) * mean(log(
        r.X)) + logSurfacePSphere(n, p)