コード例 #1
0
def matlab(path, varname=None):
    """
    Loads a matlab file from the specified path. If no variable name
    is passed to the function it uses the largest variable in the
    matlab file.

    :param path: Path to the .mat file.
    :type path: string
    :param varname: Name of the variable to be loaded from the .mat file.
    :type varname: string
    :returns: Data object with the data from the specified file.
    :rtype: natter.DataModule.Data

    """
    dat = io.loadmat(path, struct_as_record=True)
    if varname:
        return Data(dat[varname], 'Matlab data from ' + path)
    else:
        thekey = None
        maxdat = 0
        for k in dat.keys():
            if type(dat[k]) == ndarray:
                sh = shape(dat[k])
                if sh[0] * sh[1] > maxdat:
                    maxdat = sh[0] * sh[1]
                    thekey = k
        return Data(dat[thekey], 'Matlab variable ' + thekey + ' from ' + path)
コード例 #2
0
ファイル: Entropy.py プロジェクト: fabiansinz/natter
def LpEntropy(dat,p=None):
    """
    Estimates the joint entropy (in nats) of a Lp-spherically
    symmetric distributed source without explicit knowledge of the
    radial distribution. If p is not specified, it is estimated by
    fitting a pCauchy distribution to the ratios.

    :param dat: Lp-spherically symmetric distributed sources
    :type dat:  natter.DataModule.Data
    :param p: p of the Lp-spherically symmetric source (default: None)
    :type p: float
    :returns: entropy in nats
    :rtype: float
    """
    # estimate p with a pCauchy distribution
    n = dat.dim()
    if p is None:
        from natter.Distributions import PCauchy
        pCauchy = PCauchy(n=n-1)
        Z = zeros((n-1,dat.numex()))
        normalizingDims = randint(n,size=(dat.numex(),))
        for k in xrange(n):
            ind = (normalizingDims == k)
            Z[:,ind] = dat.X[:,ind][range(k) + range(k+1,n),:]/atleast_2d(dat.X[k,ind])
        dat2 = Data(Z)
        dat2.X = dat2.X[:,isfinite(sum(dat2.X,axis=0))]
        pCauchy.estimate(dat2)
        p = pCauchy['p']
        print "\tUsing p=%.2f" % (p,)

    # estimate the entropy via
    r = dat.norm(p=p)
    return marginalEntropy(r)[0,0]  + (n-1)*mean(log(r.X)) + logSurfacePSphere(n,p)
コード例 #3
0
ファイル: TestData.py プロジェクト: fabiansinz/natter
 def test_makeWhiVolCons(self):
     print "Testing making whitening volume conserving ...",
     C = randn(5,5)
     C = dot(C,C.T)
     
     dat = Data(dot(cholesky(C),randn(5,10000)))
     dat.makeWhiteningVolumeConserving()
     F0 = LinearTransformFactory.DCnonDC(dat)
     F0 = F0[1:,:]
     F = LinearTransformFactory.SYM(F0*dat)
     self.assertTrue(abs(det(F.W) - 1.0) < 1e-5,'Determinant is not equal to one!')
コード例 #4
0
    def test_makeWhiVolCons(self):
        print "Testing making whitening volume conserving ...",
        C = randn(5, 5)
        C = dot(C, C.T)

        dat = Data(dot(cholesky(C), randn(5, 10000)))
        dat.makeWhiteningVolumeConserving()
        F0 = LinearTransformFactory.DCnonDC(dat)
        F0 = F0[1:, :]
        F = LinearTransformFactory.SYM(F0 * dat)
        self.assertTrue(
            abs(det(F.W) - 1.0) < 1e-5, 'Determinant is not equal to one!')
コード例 #5
0
 def test_positiveHomogeneity(self):
     print "Testing positive homogeneity ..."
     sys.stdout.flush()
     X = np.random.randn(6, 100)
     p = np.random.rand(3) + 1.0
     L = LpNestedFunction('(0,0:2,(1,2:4,(2,4:6)))', p)
     a = np.random.randn() * 10
     dat = Data(X)
     dat2 = Data(a * X)
     tmp = L.f(dat)
     tmp2 = L.f(dat2)
     self.assertFalse(np.max(np.abs(np.abs(a)*tmp.X-tmp2.X)) > self.Tol,\
         'Function positive homogeneity deviates by more than ' + str(self.Tol))
コード例 #6
0
    def ppf(self,u,maxiter=500, tol = 1e-5):
        '''

        Evaluates the percent point function (i.e. the inverse c.d.f.)
        of the mixture of Gaussians distribution.

        It uses a Newton-Raphson method with preinitialization.
        
        :param u:  Points at which the p.p.f. will be computed.
        :type u: numpy.array
        :param maxiter: maximum number of iterations
        :param tol: convergence tolerance
        :returns:  Data object with the resulting points in the domain of this distribution. 
        :rtype:    natter.DataModule.Data
           
        '''


        # preinitialization: if there was just a single Gaussian
        # weighted by pi_k, the cdf would saturize to pi_k, the cdf of
        # this Gaussians mean would lie at pi_k/2. If the Gaussians
        # were we separated, the cdf ranges would approximately split
        # up [0,1] in [0,pi_1,pi-1+pi_2, ..., 1]. We initialize the x
        # for each u with the mean of the Gaussian that corresponds to
        # that interval.

        print "\tpreinitialize ..."
        U = cumsum(self.param['pi'])
        X = 0*u
        m = max(u.shape)
        for i in xrange(m):
            k = 0
            while u[i] > U[k]:
                k +=1
            X[i] = self.param['mu'][k]
        
        
        
        dat = Data(X,'Function values of the p.p.f of %s' % (self.name,))
        iteration = 0
        sys.stderr.write("\tNewton-Raphson ...")
        while iteration < maxiter and max(abs(u-self.cdf(dat))) > tol:
            sys.stderr.write('%03i\b\b\b' % (iteration,))
            iteration += 1
            dat.X = dat.X - (self.cdf(dat)-u)/ 2 /(self.pdf(dat) + 1e-2)
        print ""
        if max(abs(u-self.cdf(dat))) > tol:
            print "\tWARNING! natter.Distributions.MixtureOfGaussians: ppf did not converge!"
            print max(abs(u-self.cdf(dat)))
        
        return dat
コード例 #7
0
 def test_derivatives(self):
     print "Testing derivative for p-nested function ... "
     sys.stdout.flush()
     L = LpNestedFunction()
     dat = Data(np.random.randn(25,100)*5.0)
     df = L.dfdx(dat)
     df2 = np.Inf*df
     h = 1e-8
     for k in range(dat.size(0)):
         Y = dat.X.copy()
         Y[k,:] += h
         dat2 = Data(Y)
         df2[k,:] = (L.f(dat2).X - L.f(dat).X)/h
     self.assertFalse(np.max(np.abs( (df2-df).flatten() )) > self.derTol,\
         'Derivatives of Lp-nested function deviate with ' + str(np.max(np.abs( (df2-df).flatten() ))) + ' by more that ' + str(self.derTol) + '!')
コード例 #8
0
 def test_derivatives(self):
     print "Testing derivative for p-nested function ... "
     sys.stdout.flush()
     L = LpNestedFunction()
     dat = Data(np.random.randn(25, 100) * 5.0)
     df = L.dfdx(dat)
     df2 = np.Inf * df
     h = 1e-8
     for k in range(dat.size(0)):
         Y = dat.X.copy()
         Y[k, :] += h
         dat2 = Data(Y)
         df2[k, :] = (L.f(dat2).X - L.f(dat).X) / h
     self.assertFalse(np.max(np.abs( (df2-df).flatten() )) > self.derTol,\
         'Derivatives of Lp-nested function deviate with ' + str(np.max(np.abs( (df2-df).flatten() ))) + ' by more that ' + str(self.derTol) + '!')
コード例 #9
0
    def ppf(self, u, maxiter=500, tol=1e-5):
        '''

        Evaluates the percent point function (i.e. the inverse c.d.f.)
        of the mixture of Gaussians distribution.

        It uses a Newton-Raphson method with preinitialization.
        
        :param u:  Points at which the p.p.f. will be computed.
        :type u: numpy.array
        :param maxiter: maximum number of iterations
        :param tol: convergence tolerance
        :returns:  Data object with the resulting points in the domain of this distribution. 
        :rtype:    natter.DataModule.Data
           
        '''

        # preinitialization: if there was just a single Gaussian
        # weighted by pi_k, the cdf would saturize to pi_k, the cdf of
        # this Gaussians mean would lie at pi_k/2. If the Gaussians
        # were we separated, the cdf ranges would approximately split
        # up [0,1] in [0,pi_1,pi-1+pi_2, ..., 1]. We initialize the x
        # for each u with the mean of the Gaussian that corresponds to
        # that interval.

        print "\tpreinitialize ..."
        U = cumsum(self.param['pi'])
        X = 0 * u
        m = max(u.shape)
        for i in xrange(m):
            k = 0
            while u[i] > U[k]:
                k += 1
            X[i] = self.param['mu'][k]

        dat = Data(X, 'Function values of the p.p.f of %s' % (self.name, ))
        iteration = 0
        sys.stderr.write("\tNewton-Raphson ...")
        while iteration < maxiter and max(abs(u - self.cdf(dat))) > tol:
            sys.stderr.write('%03i\b\b\b' % (iteration, ))
            iteration += 1
            dat.X = dat.X - (self.cdf(dat) - u) / 2 / (self.pdf(dat) + 1e-2)
        print ""
        if max(abs(u - self.cdf(dat))) > tol:
            print "\tWARNING! natter.Distributions.MixtureOfGaussians: ppf did not converge!"
            print max(abs(u - self.cdf(dat)))

        return dat
コード例 #10
0
def gauss(n,m,mu = None, sigma = None):
    """

    Samples m n-dimensional samples from a Gaussian with mean mu and covariance sigma.


    :param n: dimensionality
    :type n: int
    :param m: number of samples
    :type m: int
    :param mu: mean (default = zeros((n,1)))
    :type mu: numpy.array
    :param sigma: covariance matrix (default = eye(n))
    :type sigma: numpy.array
    :returns: Data object with sampled patches
    :rtype: natter.DataModule.Data

    """
    if not mu == None:
        mu = reshape(mu,(n,1))
    else:
        mu = zeros((n,1))
    if sigma == None:
        sigma = eye(n)
    return Data(dot(cholesky(sigma),randn(n,m))+mu,'Multivariate Gaussian data.')
コード例 #11
0
def sampleSequenceWithIterator(theIterator, m):
    """
    Uses the iterator to sample a sequence of m pairs of patches from it.
    theIterator must return a pair of data points at a time. Pairs are
    stored at column i and i+m.

    :param theIterator: Iterator that returns data poitns
    :type theIterator: iterator
    :param m: number of patch pairs to sample
    :type m: int
    :returns: Data object with 2*m samples
    :rtype: natter.DataModule.Data
    """
    count = 1
    x0,y0 = theIterator.next()
    n = x0.size
    X = zeros((n,2*m))
    X[:,0] = x0
    X[:,m] = y0
    for sample in theIterator:
        X[:,count] = sample[0]
        X[:,count+m] = sample[1]
        count += 1
        if count == m:
            break
    return Data(X,'Sequence of %i data pairs sampled with iterator.' % (m, ))
コード例 #12
0
def img2PatchRand(img, p, N):
    """

    Samples N pxp patches from img.

    The images are vectorized in FORTRAN/MATLAB style.

    :param img: Image to sample from
    :type img: numpy.array
    :param p: patch size
    :type p: int
    :param N: number of patches to sampleFromImagesInDir
    :type N: int
    :returns: Data object with sampled patches
    :rtype: natter.DataModule.Data

    """

    ny,nx = shape(img)

    p1 = p - 1

    X = zeros( ( p*p, N))

    stdout.flush()
    for ii in xrange(int(N)):
        ptch = array([NaN])
        while any( isnan( ptch.flatten())) or any( isinf(ptch.flatten())) or any(ptch.flatten() == 0.0):
            xi = floor( rand() * ( nx - p))
            yi = floor( rand() * ( ny - p))
            ptch = img[ yi:yi+p1+1, xi:xi+p1+1]
            X[:,ii] = ptch.flatten('F')

    name = "%d %dX%d patches" % (N,p,p)
    return Data(X, name)
コード例 #13
0
    def sample(self,m,components=None):
        """

        Samples m samples from the current finite mixture distribution.

        :param m: Number of samples to draw.
        :type m: int.
        :rtype: natter.DataModule.Data
        :returns:  A Data object containing the samples


        """
        dim = self['P'][0].sample(1).dim()
        nc = multinomial(m,self.param['alpha'])
        mrange = range(m)
        shuffle(mrange)
        X = zeros((dim,m))
        ind = 0
        K = len(self['P'])
        for k in xrange(K):
            dat = self.param['P'][k].sample(nc[k])
            X[:,mrange[ind:ind + nc[k]]] = dat.X
            if components is not None:
                components[mrange[ind:ind + nc[k]]] = k
            ind += nc[k]
        return Data(X,"%i samples from a %i-dimensional finite mixture distribution" % (m,dim))
コード例 #14
0
 def test_derivatives(self):
     print "Testing derivative for p-nested symmetric distribution with radial gamma"
     sys.stdout.flush()
     myu = 10 * np.random.rand(1)[0]
     mys = 10 * np.random.rand(1)[0]
     n = 10
     L = Auxiliary.LpNestedFunction('(0,0,(1,1:4),4,(1,5:8),8:10)')
     p = Distributions.LpNestedSymmetric({
         'f':
         L,
         'n':
         n,
         'rp':
         Distributions.Gamma({
             's': mys,
             'u': myu
         })
     })
     dat = p.sample(50)
     df = p.dldx(dat)
     h = 1e-8
     df2 = np.array(dat.X * np.Inf)
     for k in range(n):
         y = np.array(dat.X)
         y[k, :] += h
         df2[k, :] = (p.loglik(Data(y)) - p.loglik(dat)) / h
     self.assertFalse(np.max(np.abs(df-df2).flatten()) > self.llTol,\
         'Difference in derivative of log-likelihood for p-nested symmetric greater than ' + str(self.llTol))
コード例 #15
0
 def test_loglik(self):
     print 'Testing log-likelihood of p-spherically symmetric distribution with radial gamma'
     sys.stdout.flush()
     for k in range(5):
         print '\t--> test case ' + str(k)
         dat = io.loadmat(self.matpath + '/TestPSphericallySymmetric' +
                          str(k) + '.mat',
                          struct_as_record=True)
         truell = np.squeeze(dat['ll'])
         p = Distributions.LpSphericallySymmetric({
             'p':
             dat['p'],
             'n':
             dat['n'],
             'rp':
             Distributions.Gamma({
                 's': dat['s'],
                 'u': dat['u']
             })
         })
         dat = Data(dat['X'])
         ll = p.loglik(dat)
         for i in range(len(ll)):
             self.assertFalse(np.abs(ll[i]-truell[i]) > self.Tol,\
                'Log-likelihood for p-spherically symmetric with radial gamma deviates from test case')
コード例 #16
0
    def test_derivatives(self):
        print "Testing derivative for p-spherically symmetric distribution with radial gamma"
        sys.stdout.flush()
        myu = 3.0 * np.random.rand(1)[0] + 1.0
        mys = 3.0 * np.random.rand(1)[0] + 1.0
        myp = 2 * np.random.rand(1)[0] + .5
        n = 4
        p = Distributions.LpSphericallySymmetric({
            'p':
            myp,
            'n':
            n,
            'rp':
            Distributions.Gamma({
                's': mys,
                'u': myu
            })
        })
        dat = p.sample(50)
        df = p.dldx(dat)
        h = 1e-8
        df2 = np.array(dat.X * np.Inf)
        for k in range(n):
            y = np.array(dat.X)
            y[k, :] += h
            df2[k, :] = (p.loglik(Data(y)) - p.loglik(dat)) / h
        self.assertFalse(np.max(np.abs(df-df2).flatten()) > self.llTol,\
           'Difference ' + str(np.max(np.abs(df-df2).flatten())) + ' in derivative of log-likelihood for p-spherically symmetric greater than ' + str(self.llTol))

        print "[Ok]"
コード例 #17
0
    def test_derivatives(self):
        print "Testing derivatives w.r.t. data ... "
        sys.stdout.flush()

        P = []
        for k in range(10):
            myp = 2.0 * np.random.rand(1)[0] + .5
            mys = 3.0 * np.random.rand(1)[0] + 1.0
            p = Distributions.ExponentialPower({'p': myp, 's': mys})
            P.append(p)

        p = Distributions.ProductOfExponentialPowerDistributions({'P': P})

        dat = p.sample(100)
        h = 1e-7
        tol = 1e-4
        Y0 = dat.X.copy()

        df = p.dldx(dat)
        df2 = 0.0 * df
        for i in xrange(dat.size(0)):
            y = Y0.copy()

            y[i, :] = y[i, :] + h
            df2[i, :] = (p.loglik(Data(y)) - p.loglik(dat)) / h

        prot = {}
        prot[
            'message'] = 'Difference in derivative of log-likelihood for PowerExponential greater than ' + str(
                tol)
        prot['max difference'] = np.max(np.abs((df - df2).flatten()))
        prot['mean difference'] = np.mean(np.abs((df - df2).flatten()))

        self.assertTrue(
            np.max(np.abs(df - df2)) < tol, Auxiliary.prettyPrintDict(prot))
コード例 #18
0
    def histogram(self, dat, cdf=False, ax=None, plotlegend=True, bins=None):
        """
        Plots a histogram of the data points in dat. This works only
        for 1-dimensional distributions. It also plots the pdf of the distribution.

        :param dat: data points that enter the histogram
        :type dat: natter.DataModule.Data
        :param cdf: boolean that indicates whether the cdf should be plotted or not (default: False)
        :param ax: axes object the histogram is plotted into if it is not None.
        :param plotlegend: boolean indicating whether a legend should be plotted (default: True)
        :param bins: number of bins to be used. If None (default), the bins are automatically determined.
        """

        sh = shape(dat.X)
        if len(sh) > 1 and sh[0] > 1:
            raise Errors.DimensionalityError(
                'Cannont plot data with more than one dimension!')

        if ax == None:
            fig = plt.figure()
            ax = fig.add_axes([.1, .1, .8, .8])
        x = squeeze(dat.X)
        if bins is None:
            bins = max(sh) / 200
        n, bins, patches = ax.hist(x,
                                   bins=bins,
                                   normed=1,
                                   facecolor='blue',
                                   alpha=0.8,
                                   lw=0.0)

        bincenters = 0.5 * (bins[1:] + bins[:-1])
        y = squeeze(self.pdf(Data(bincenters)))
        ax.plot(bincenters, y, 'k--', linewidth=2)

        if hasattr(self, 'cdf') and cdf:
            z = squeeze(self.cdf(Data(bincenters)))
            ax.plot(bincenters, z, 'k.-', linewidth=2)
            if plotlegend:
                plt.legend(('p.d.f.', 'c.d.f.', 'Histogram'), frameon=False)
        elif plotlegend:
            plt.legend(('p.d.f.', 'Histogram'), frameon=False)

        ax.set_xlabel('x')
        ax.set_ylabel('Probability')
        ax.set_xlim(min(x), max(x))
        ax.grid(True)
コード例 #19
0
def loadnpz(path, varname=None, transpose=None):
    """
    Loads a npz file from the specified path. If no variable name
    is passed to the function it prints all variables and asks for
    user input.

    :param path: Path to the .npz file.
    :type path: string
    :param varname: Name of the variable to be loaded from the .npz file.
    :type varname: string
    :param transpose: Transpose of variable shall be loaded or the orientation shall be guessed
    :type transpose: bool
    :returns: Data object with the data from the specified file.
    :rtype: natter.DataModule.Data

    """
    fin = np.load(path)
    if varname is not None:
        if fin.keys().count(varname) > 0:
            dat = atleast_2d(fin[varname])
        else:
            raise ValueError, 'Given variable name "%s" does not exist in file "%s".' % (
                varname, path)

    else:
        stdout.write('Variables in "%s":\n' % (path))
        for var in fin.keys():
            stdout.write(var + '\n')
        stdout.write('Which variable should be loaded: ')
        var = stdin.readline()[:-1]
        if fin.keys().count(var) > 0:
            dat = atleast_2d(fin[var])
        else:
            raise ValueError, 'Given variable name "%s" does not exist in file "%s".' % (
                var, path)

    if transpose == None:
        if dat.shape[0] > dat.shape[1]:
            transpose = True
        else:
            transpose = False

    if transpose:
        return Data(dat.T, 'npz data from ' + path)
    else:
        return Data(dat, 'npz data from ' + path)
コード例 #20
0
    def sample(self, m):
        """

        Samples m samples from the current LpNestedSymmetric distribution.

        :param m: Number of samples to draw.
        :type m: int.
        :returns:  A Data object containing the samples
        :rtype:    natter.DataModule.Data

        """
        ret = zeros((self.param['f'].n[()], m))
        r = beta(float(self.param['f'].n[()]), 1.0, (1, m))
        _recsample((), r, self.param['f'], m, ret)

        ret = Data(ret, 'Samples from ' + self.name)
        ret.scale(self.param['rp'].sample(m).X / self.param['f'].f(ret).X)

        return ret
コード例 #21
0
    def test_LogDetRadialTransform(self):
        print "Testing logdet of radial transformation ... "
        sys.stdout.flush()
        p = np.random.rand() * 3. + .5
        # source distribution
        psource = Distributions.LpSphericallySymmetric({'p': p})
        # target distribution
        ptarget = Distributions.LpSphericallySymmetric({
            'p':
            p,
            'rp':
            Distributions.Gamma({
                'u': np.random.rand() * 3.0,
                's': np.random.rand() * 2.0
            })
        })
        # create Filter
        F = NonlinearTransformFactory.RadialTransformation(psource, ptarget)
        # sample data from source distribution
        dat = psource.sample(100)

        # apply filter to data
        dat2 = F * dat
        logDetJ = F.logDetJacobian(dat)
        logDetJ2 = 0 * logDetJ

        h = 1e-8

        tmp = Data(dat.X.copy())
        tmp.X[0, :] += h
        W1 = ((F * tmp).X - dat2.X) / h

        tmp = Data(dat.X.copy())
        tmp.X[1, :] += h
        W2 = ((F * tmp).X - dat2.X) / h
        for i in range(dat.numex()):

            logDetJ2[i] = np.log(
                np.abs(W1[0, i] * W2[1, i] - W1[1, i] * W2[0, i]))

        self.assertFalse(np.max(np.abs(logDetJ - logDetJ2)) > self.detTol,\
                         'Log determinant of radial transformation deviates by more than ' + str(self.detTol) + '!')
コード例 #22
0
    def histogram(self, dat, cdf=False, ax=None, plotlegend=True):
        """
        Plots a histogram of the data points in dat. This works only
        for 1-dimensional distributions. It also plots the pdf of the distribution.

        :param dat: data points that enter the histogram
        :type dat: natter.DataModule.Data
        :param cdf: boolean that indicates whether the cdf should be plotted or not (default: False)
        :param ax: axes object the histogram is plotted into if it is not None.
        :param plotlegend: boolean indicating whether a legend should be plotted (default: True)
        """

        b = array(self.param['b'])
        d = (b[1:] - b[:-1]) / 2.0
        b[1:] = b[1:] - d
        b[0] -= d[0]
        b = hstack((b, b[-1] + 2.0 * d[-1]))

        h = histogram(squeeze(dat.X), bins=b)[0]
        h = h / sum(h) / (b[1:] - b[:-1])
        if ax == None:
            fig = figure()
            ax = fig.add_axes([.1, .1, .8, .8])

        d2 = b[1:] - b[:-1]
        ax.bar(b[:-1], h, width=d2)

        bincenters = linspace(b[0], b[-1], 1000)
        y = squeeze(self.pdf(Data(bincenters)))
        ax.plot(bincenters, y, 'k--', linewidth=2)

        if hasattr(self, 'cdf') and cdf:
            z = squeeze(self.cdf(Data(bincenters)))
            ax.plot(bincenters, z, 'k.-', linewidth=2)
            if plotlegend:
                legend(('p.d.f.', 'c.d.f.', 'Histogram'))
        elif plotlegend:
            legend(('p.d.f.', 'Histogram'))

        ax.set_xlabel('x')
        ax.set_ylabel('Probability')
        ax.grid(True)
コード例 #23
0
ファイル: Beta.py プロジェクト: zym1010/natter
    def sample(self, m):
        '''
        Samples m examples from the distribution.

        :param m: number of patches to sample
        :type m: int
        :returns: Samples from the ChiP distribution
        :rtype: natter.DataModule.Data

        '''
        return Data(stats.beta.rvs(self['alpha'], self['beta'], size=(m, )))
コード例 #24
0
    def objective(self, W, nargout,dat,q):
        """
        The objective function to be optimized with
        Auxiliary.Optimization.StGradient. It computes the mean likelihood

        :param W: current matrix W
        :type W: numpy.ndarray
        :param nargout: number of output arguments. If 1, returns objective, otherwise the derivative as well
        :param dat: data points at which the objective is evaluated
        :type dat: natter.DataModule.Data
        :param q: base distribution
        :type q: natter.Distributions.Distribution
        :returns: value of the objective and the derivative (if nargout != 1)
        """
        (n,m) = dat.size()
        if nargout == 1:
            return (sum(q.loglik(Data(array(dot(W,dat.X)))))/m/n/log(2.),)
        else:
            return (sum(q.loglik(Data(array(dot(W,dat.X)))))/m/n/log(2.), \
                        dot(q.dldx(Data(array(dot(W,dat.X)))),\
                                   dat.X.transpose())/m/n/log(2))
コード例 #25
0
    def ppf(self, X):
        '''

        Evaluates the percentile function (inverse c.d.f.) for a given array of quantiles.

        :param X: Percentiles for which the ppf will be computed.
        :type X: numpy.array
        :returns:  A Data object containing the values of the ppf.
        :rtype:    natter.DataModule.Data
           
        '''
        return Data(X)
コード例 #26
0
 def test_derivatives(self):
     print "Testing derivatives w.r.t. data ... "
     sys.stdout.flush()
     p = Distributions.MixtureOfGaussians({'K': 5})
     dat = p.sample(100)
     h = 1e-7
     tol = 1e-6
     y = np.array(dat.X) + h
     df = p.dldx(dat)
     df2 = (p.loglik(Data(y)) - p.loglik(dat)) / h
     self.assertFalse(np.max(np.abs(df-df2)) > tol,\
         'Difference ' +str(np.max(np.abs(df-df2))) +' in derivative of log-likelihood for MixtureOfGaussians greater than ' + str(tol))
コード例 #27
0
    def sample(self,m):
        """

        Samples m samples from the current LpSphericallySymmetric distribution.

        :param m: Number of samples to draw.
        :type m: int.
        :returns:  A Data object containing the samples
        :rtype:    natter.DataModule.Data

        """
        # sample from a p-generlized normal with scale 1
        z = gamma(1/self.param['p'],1.0,(self.param['n'],m))
        z = abs(z)**(1/self.param['p'])
        dat =  Data(z * sign(randn(self.param['n'],m)),'Samples from ' + self.name, \
                      ['sampled ' + str(m) + ' examples from Lp-generalized Normal'])
        # normalize the samples to get a uniform distribution.
        dat.normalize(self.param['p'])
        r = self.param['rp'].sample(m)
        dat.scale(r)
        return dat
コード例 #28
0
 def test_LpEntropy(self):
     print "Testing Lp-Entropy estimator"
     n = randint(5) + 1
     s = 10.0 * rand()
     x = randn(n, 20000) * s
     h = n * .5 * log(2.0 * pi * e * s**2)
     dat = Data(x)
     h2 = Entropy.LpEntropy(dat)
     self.assertTrue(
         abs(h - h2) < self.tol,
         'Entropy estimates for LpEntropy differ by more than ' +
         str(self.tol))
コード例 #29
0
 def test_marginalEntropyEstimators(self):
     print "Testing marginal entropy estimation ..."
     stdout.flush()
     s = 10.0 * rand(2, 1)
     x = randn(2, 10000) * s
     h = .5 * log(2.0 * pi * e * s**2)
     dat = Data(x)
     for method in ['MLE', 'JK', 'CAE', 'MM']:
         h2 = Entropy.marginalEntropy(dat, method)
         self.assertTrue(
             max(abs(h - h2)) < self.tol, 'Entropy estimates for ' +
             method + 'differ by more than ' + str(self.tol))
コード例 #30
0
ファイル: LogNormal.py プロジェクト: zym1010/natter
    def sample(self, m):
        """

        Samples m samples from the current GammaP distribution.

        :param m: Number of samples to draw.
        :type m: int.
        :returns:  A Data object containing the samples
        :rtype:    natter.DataModule.Data

        """
        return Data(exp(randn(1, m) * self.param['s'] + self.param['mu']),
                    str(m) + ' samples from ' + self.name)
コード例 #31
0
    def f(self, dat):
        """
        Computes the value of the Lp-nested funtion at the vectors in
        dat. Alternatively you can directly call the object on the
        data, i.e. use *L(dat)* instead of *L.f(dat)*.

        :param dat: Data on which the LpNestedFunction will be evaluated.
        :type dat: natter.DataModule.Data
        :returns: A Data object containing the function values
        :rtype: natter.DataModule.Data

        """
        return Data(computerec(self.tree, dat.X, self.p))
コード例 #32
0
ファイル: TestDirichlet.py プロジェクト: zym1010/natter
class TestDirichlet(unittest.TestCase):

    X = Data(
        np.array([[
            0.1042605373, 0.0443097862, 0.0032503423, 0.0420286884,
            0.1194181369, 0.1848512638, 0.0906818056, 0.4223094329,
            0.4998465219, 0.0078395240
        ],
                  [
                      0.7299213688, 0.5167476582, 0.4604688785, 0.4604338136,
                      0.4221988687, 0.7307655970, 0.6077871086, 0.1683807824,
                      0.4403800496, 0.7195288939
                  ],
                  [
                      0.1658180939, 0.4389425556, 0.5362807793, 0.4975374980,
                      0.4583829944, 0.0843831391, 0.3015310858, 0.4093097847,
                      0.0597734285, 0.2726315821
                  ]]))
    LL = np.array([
        1.3689065138, 1.7564748726, 2.7472333803, 1.7160392427, 1.1921277658,
        0.8798846426, 1.4837080045, -0.2267826679, -0.1300239648, 2.5635963658
    ])
    alpha = np.array([0.6086919517, 1.9573600512, 1.3938315963])
    Tol = 1e-7
    TolParam = 5 * 1e-2

    def test_loglik(self):
        print "Testing log-likelihood of Dirichlet distribution ... "
        sys.stdout.flush()
        p = Distributions.Dirichlet({'alpha': self.alpha})
        l = p.loglik(self.X)
        for k in range(len(self.LL)):
            self.assertTrue(
                np.abs(l[k] - self.LL[k]) < self.Tol,
                'Difference in log-likelihood for Dirichlet greater than ' +
                str(self.Tol))

    def test_estimate(self):
        print "Testing parameter estimation of Dirichlet distribution ..."
        sys.stdout.flush()
        myalpha = 10.0 * np.random.rand(10)
        p = Distributions.Dirichlet({'alpha': myalpha})
        dat = p.sample(50000)
        p = Distributions.Dirichlet({'alpha': np.random.rand(10)})
        p.estimate(dat)
        alpha = p.param['alpha']

        self.assertTrue(
            np.max(np.abs(alpha - myalpha)) < self.TolParam,
            'Difference in alpha parameter for Dirichlet distribution greater than '
            + str(self.TolParam))
コード例 #33
0
    def sample(self,m):
        """

        Samples m samples from the current TruncatedGaussian distribution.

        :param m: Number of samples to draw.
        :type m: int.
        :rtype: natter.DataModule.Data
        :returns:  A Data object containing the samples


        """
        a,b = (self.param['a']-self.param['mu'])/self.param['sigma'],(self.param['b']-self.param['mu'])/self.param['sigma']
        return Data(truncnorm.rvs(a,b,loc=self.param['mu'],scale=self.param['sigma'],size=m),'%i samples from %s' % (m,self.name))
コード例 #34
0
    def ppf(self,u):
        '''

        Evaluates the percentile function (inverse c.d.f.) for a given array of quantiles.

        :param u: Percentiles for which the ppf will be computed.
        :type u: numpy.array
        :returns:  A Data object containing the values of the ppf.
        :rtype:    natter.DataModule.Data
           
        '''
        
        a,b = (self.param['a']-self.param['mu'])/self.param['sigma'],(self.param['b']-self.param['mu'])/self.param['sigma']
        return Data(truncnorm.ppf(u,a,b,loc=self.param['mu'],scale=self.param['sigma']), 'Percentiles from a %s' % (self.name,))
コード例 #35
0
ファイル: GammaP.py プロジェクト: zym1010/natter
    def ppf(self, U):
        '''

        Evaluates the percentile function (inverse c.d.f.) for a given array of quantiles.

        :param U: Percentiles for which the ppf will be computed.
        :type U: numpy.array
        :returns:  A Data object containing the values of the ppf.
        :rtype:    natter.DataModule.Data
           
        '''
        return Data(
            gamma.ppf(U, self.param['u'],
                      scale=self.param['s'])**(1 / self.param['p']))
コード例 #36
0
    def ppf(self,u,bounds=None,maxiter=1000):
        '''

        Evaluates the percentile function (inverse c.d.f.) for a given
        array of quantiles. The single mixture components must
        implement ppf and pdf.

        NOTE: ppf works only for one dimensional mixture distributions.

        :param u: Percentiles for which the ppf will be computed.
        :type u: numpy.array
        :param bounds: a tuple of two array of the same size of u that specifies the initial upper and lower boundaries for the bisection method.
        :type bounds: tuple of two numpy.array
        :param maxiter: maximum number of iterations
        :type maxiter: int
        :returns:  A Data object containing the values of the ppf.
        :rtype:    natter.DataModule.Data
           
        '''

        ret = Data(u,'Percentiles from ' + self.name)
        # use bisection method on to invert
        #v = squeeze(log(u/(1-u)))
        if bounds is not None:
            lb = Data(bounds[0])
            ub = Data(bounds[1])
        elif self.param['P'][0].param.has_key('a') and self.param['P'][0].param.has_key('b'):
            warn("\tAssuming that the keys a=%.2g and b=%.2g in %s refer to boundaries. Using those..." % (self.param['P'][0]['a'],self.param['P'][0]['b'],self.param['P'][0].name,))
            lb = Data(0*u+self.param['P'][0]['a'])
            ub = Data(0*u+self.param['P'][0]['b'])
        else:
            lb = Data(u*0-1e6)
            ub = Data(u*0+1e6)
        def f(dat):
            # c = self.cdf(dat)
            # return v - log(c/(1-c))
            return u-self.cdf(dat)

        iterC = 0
        while max(ub.X-lb.X) > 5*1e-10 and iterC < maxiter:
            ret.X = (ub.X+lb.X)/2
            mf = f(ret)
            lf = f(lb)
            uf = f(ub)
            if any(lf*uf>0):
                warn("ppf lost the root! resetting boundaries")
                ind0 = where(lf*uf > 0)
                ub.X[0,ind0[0]] = 4*abs(ub.X[0,ind0[0]]+1)
                lb.X[0,ind0[0]] = -4*abs(lb.X[0,ind0[0]]+1)
            ind0 = where(mf*lf < 0)
            ind1 = where(mf*uf < 0)
            ub.X[0,ind0[0]] = ret.X[0,ind0[0]]
            lb.X[0,ind1[0]] = ret.X[0,ind1[0]]
            iterC +=1
            sys.stdout.write(80*" " + "\r\tFiniteMixtureDistribution.ppf maxdiff: %.4g, meandiff: %.4g" % (max(ub.X-lb.X),mean(ub.X-lb.X)))
            sys.stdout.flush()
        if iterC == maxiter:
            warn("FiniteMixtureDistribution.ppf: Maxiter reached! Exiting. Bisection method might not have been converged. Maxdiff is %.10g. Mean diff is %.4g" % ( max(ub.X-lb.X),mean(ub.X-lb.X)))
        #sys.stdout.write("\n")
        return ret