def matlab(path, varname=None): """ Loads a matlab file from the specified path. If no variable name is passed to the function it uses the largest variable in the matlab file. :param path: Path to the .mat file. :type path: string :param varname: Name of the variable to be loaded from the .mat file. :type varname: string :returns: Data object with the data from the specified file. :rtype: natter.DataModule.Data """ dat = io.loadmat(path, struct_as_record=True) if varname: return Data(dat[varname], 'Matlab data from ' + path) else: thekey = None maxdat = 0 for k in dat.keys(): if type(dat[k]) == ndarray: sh = shape(dat[k]) if sh[0] * sh[1] > maxdat: maxdat = sh[0] * sh[1] thekey = k return Data(dat[thekey], 'Matlab variable ' + thekey + ' from ' + path)
def LpEntropy(dat,p=None): """ Estimates the joint entropy (in nats) of a Lp-spherically symmetric distributed source without explicit knowledge of the radial distribution. If p is not specified, it is estimated by fitting a pCauchy distribution to the ratios. :param dat: Lp-spherically symmetric distributed sources :type dat: natter.DataModule.Data :param p: p of the Lp-spherically symmetric source (default: None) :type p: float :returns: entropy in nats :rtype: float """ # estimate p with a pCauchy distribution n = dat.dim() if p is None: from natter.Distributions import PCauchy pCauchy = PCauchy(n=n-1) Z = zeros((n-1,dat.numex())) normalizingDims = randint(n,size=(dat.numex(),)) for k in xrange(n): ind = (normalizingDims == k) Z[:,ind] = dat.X[:,ind][range(k) + range(k+1,n),:]/atleast_2d(dat.X[k,ind]) dat2 = Data(Z) dat2.X = dat2.X[:,isfinite(sum(dat2.X,axis=0))] pCauchy.estimate(dat2) p = pCauchy['p'] print "\tUsing p=%.2f" % (p,) # estimate the entropy via r = dat.norm(p=p) return marginalEntropy(r)[0,0] + (n-1)*mean(log(r.X)) + logSurfacePSphere(n,p)
def test_makeWhiVolCons(self): print "Testing making whitening volume conserving ...", C = randn(5,5) C = dot(C,C.T) dat = Data(dot(cholesky(C),randn(5,10000))) dat.makeWhiteningVolumeConserving() F0 = LinearTransformFactory.DCnonDC(dat) F0 = F0[1:,:] F = LinearTransformFactory.SYM(F0*dat) self.assertTrue(abs(det(F.W) - 1.0) < 1e-5,'Determinant is not equal to one!')
def test_makeWhiVolCons(self): print "Testing making whitening volume conserving ...", C = randn(5, 5) C = dot(C, C.T) dat = Data(dot(cholesky(C), randn(5, 10000))) dat.makeWhiteningVolumeConserving() F0 = LinearTransformFactory.DCnonDC(dat) F0 = F0[1:, :] F = LinearTransformFactory.SYM(F0 * dat) self.assertTrue( abs(det(F.W) - 1.0) < 1e-5, 'Determinant is not equal to one!')
def test_positiveHomogeneity(self): print "Testing positive homogeneity ..." sys.stdout.flush() X = np.random.randn(6, 100) p = np.random.rand(3) + 1.0 L = LpNestedFunction('(0,0:2,(1,2:4,(2,4:6)))', p) a = np.random.randn() * 10 dat = Data(X) dat2 = Data(a * X) tmp = L.f(dat) tmp2 = L.f(dat2) self.assertFalse(np.max(np.abs(np.abs(a)*tmp.X-tmp2.X)) > self.Tol,\ 'Function positive homogeneity deviates by more than ' + str(self.Tol))
def ppf(self,u,maxiter=500, tol = 1e-5): ''' Evaluates the percent point function (i.e. the inverse c.d.f.) of the mixture of Gaussians distribution. It uses a Newton-Raphson method with preinitialization. :param u: Points at which the p.p.f. will be computed. :type u: numpy.array :param maxiter: maximum number of iterations :param tol: convergence tolerance :returns: Data object with the resulting points in the domain of this distribution. :rtype: natter.DataModule.Data ''' # preinitialization: if there was just a single Gaussian # weighted by pi_k, the cdf would saturize to pi_k, the cdf of # this Gaussians mean would lie at pi_k/2. If the Gaussians # were we separated, the cdf ranges would approximately split # up [0,1] in [0,pi_1,pi-1+pi_2, ..., 1]. We initialize the x # for each u with the mean of the Gaussian that corresponds to # that interval. print "\tpreinitialize ..." U = cumsum(self.param['pi']) X = 0*u m = max(u.shape) for i in xrange(m): k = 0 while u[i] > U[k]: k +=1 X[i] = self.param['mu'][k] dat = Data(X,'Function values of the p.p.f of %s' % (self.name,)) iteration = 0 sys.stderr.write("\tNewton-Raphson ...") while iteration < maxiter and max(abs(u-self.cdf(dat))) > tol: sys.stderr.write('%03i\b\b\b' % (iteration,)) iteration += 1 dat.X = dat.X - (self.cdf(dat)-u)/ 2 /(self.pdf(dat) + 1e-2) print "" if max(abs(u-self.cdf(dat))) > tol: print "\tWARNING! natter.Distributions.MixtureOfGaussians: ppf did not converge!" print max(abs(u-self.cdf(dat))) return dat
def test_derivatives(self): print "Testing derivative for p-nested function ... " sys.stdout.flush() L = LpNestedFunction() dat = Data(np.random.randn(25,100)*5.0) df = L.dfdx(dat) df2 = np.Inf*df h = 1e-8 for k in range(dat.size(0)): Y = dat.X.copy() Y[k,:] += h dat2 = Data(Y) df2[k,:] = (L.f(dat2).X - L.f(dat).X)/h self.assertFalse(np.max(np.abs( (df2-df).flatten() )) > self.derTol,\ 'Derivatives of Lp-nested function deviate with ' + str(np.max(np.abs( (df2-df).flatten() ))) + ' by more that ' + str(self.derTol) + '!')
def test_derivatives(self): print "Testing derivative for p-nested function ... " sys.stdout.flush() L = LpNestedFunction() dat = Data(np.random.randn(25, 100) * 5.0) df = L.dfdx(dat) df2 = np.Inf * df h = 1e-8 for k in range(dat.size(0)): Y = dat.X.copy() Y[k, :] += h dat2 = Data(Y) df2[k, :] = (L.f(dat2).X - L.f(dat).X) / h self.assertFalse(np.max(np.abs( (df2-df).flatten() )) > self.derTol,\ 'Derivatives of Lp-nested function deviate with ' + str(np.max(np.abs( (df2-df).flatten() ))) + ' by more that ' + str(self.derTol) + '!')
def ppf(self, u, maxiter=500, tol=1e-5): ''' Evaluates the percent point function (i.e. the inverse c.d.f.) of the mixture of Gaussians distribution. It uses a Newton-Raphson method with preinitialization. :param u: Points at which the p.p.f. will be computed. :type u: numpy.array :param maxiter: maximum number of iterations :param tol: convergence tolerance :returns: Data object with the resulting points in the domain of this distribution. :rtype: natter.DataModule.Data ''' # preinitialization: if there was just a single Gaussian # weighted by pi_k, the cdf would saturize to pi_k, the cdf of # this Gaussians mean would lie at pi_k/2. If the Gaussians # were we separated, the cdf ranges would approximately split # up [0,1] in [0,pi_1,pi-1+pi_2, ..., 1]. We initialize the x # for each u with the mean of the Gaussian that corresponds to # that interval. print "\tpreinitialize ..." U = cumsum(self.param['pi']) X = 0 * u m = max(u.shape) for i in xrange(m): k = 0 while u[i] > U[k]: k += 1 X[i] = self.param['mu'][k] dat = Data(X, 'Function values of the p.p.f of %s' % (self.name, )) iteration = 0 sys.stderr.write("\tNewton-Raphson ...") while iteration < maxiter and max(abs(u - self.cdf(dat))) > tol: sys.stderr.write('%03i\b\b\b' % (iteration, )) iteration += 1 dat.X = dat.X - (self.cdf(dat) - u) / 2 / (self.pdf(dat) + 1e-2) print "" if max(abs(u - self.cdf(dat))) > tol: print "\tWARNING! natter.Distributions.MixtureOfGaussians: ppf did not converge!" print max(abs(u - self.cdf(dat))) return dat
def gauss(n,m,mu = None, sigma = None): """ Samples m n-dimensional samples from a Gaussian with mean mu and covariance sigma. :param n: dimensionality :type n: int :param m: number of samples :type m: int :param mu: mean (default = zeros((n,1))) :type mu: numpy.array :param sigma: covariance matrix (default = eye(n)) :type sigma: numpy.array :returns: Data object with sampled patches :rtype: natter.DataModule.Data """ if not mu == None: mu = reshape(mu,(n,1)) else: mu = zeros((n,1)) if sigma == None: sigma = eye(n) return Data(dot(cholesky(sigma),randn(n,m))+mu,'Multivariate Gaussian data.')
def sampleSequenceWithIterator(theIterator, m): """ Uses the iterator to sample a sequence of m pairs of patches from it. theIterator must return a pair of data points at a time. Pairs are stored at column i and i+m. :param theIterator: Iterator that returns data poitns :type theIterator: iterator :param m: number of patch pairs to sample :type m: int :returns: Data object with 2*m samples :rtype: natter.DataModule.Data """ count = 1 x0,y0 = theIterator.next() n = x0.size X = zeros((n,2*m)) X[:,0] = x0 X[:,m] = y0 for sample in theIterator: X[:,count] = sample[0] X[:,count+m] = sample[1] count += 1 if count == m: break return Data(X,'Sequence of %i data pairs sampled with iterator.' % (m, ))
def img2PatchRand(img, p, N): """ Samples N pxp patches from img. The images are vectorized in FORTRAN/MATLAB style. :param img: Image to sample from :type img: numpy.array :param p: patch size :type p: int :param N: number of patches to sampleFromImagesInDir :type N: int :returns: Data object with sampled patches :rtype: natter.DataModule.Data """ ny,nx = shape(img) p1 = p - 1 X = zeros( ( p*p, N)) stdout.flush() for ii in xrange(int(N)): ptch = array([NaN]) while any( isnan( ptch.flatten())) or any( isinf(ptch.flatten())) or any(ptch.flatten() == 0.0): xi = floor( rand() * ( nx - p)) yi = floor( rand() * ( ny - p)) ptch = img[ yi:yi+p1+1, xi:xi+p1+1] X[:,ii] = ptch.flatten('F') name = "%d %dX%d patches" % (N,p,p) return Data(X, name)
def sample(self,m,components=None): """ Samples m samples from the current finite mixture distribution. :param m: Number of samples to draw. :type m: int. :rtype: natter.DataModule.Data :returns: A Data object containing the samples """ dim = self['P'][0].sample(1).dim() nc = multinomial(m,self.param['alpha']) mrange = range(m) shuffle(mrange) X = zeros((dim,m)) ind = 0 K = len(self['P']) for k in xrange(K): dat = self.param['P'][k].sample(nc[k]) X[:,mrange[ind:ind + nc[k]]] = dat.X if components is not None: components[mrange[ind:ind + nc[k]]] = k ind += nc[k] return Data(X,"%i samples from a %i-dimensional finite mixture distribution" % (m,dim))
def test_derivatives(self): print "Testing derivative for p-nested symmetric distribution with radial gamma" sys.stdout.flush() myu = 10 * np.random.rand(1)[0] mys = 10 * np.random.rand(1)[0] n = 10 L = Auxiliary.LpNestedFunction('(0,0,(1,1:4),4,(1,5:8),8:10)') p = Distributions.LpNestedSymmetric({ 'f': L, 'n': n, 'rp': Distributions.Gamma({ 's': mys, 'u': myu }) }) dat = p.sample(50) df = p.dldx(dat) h = 1e-8 df2 = np.array(dat.X * np.Inf) for k in range(n): y = np.array(dat.X) y[k, :] += h df2[k, :] = (p.loglik(Data(y)) - p.loglik(dat)) / h self.assertFalse(np.max(np.abs(df-df2).flatten()) > self.llTol,\ 'Difference in derivative of log-likelihood for p-nested symmetric greater than ' + str(self.llTol))
def test_loglik(self): print 'Testing log-likelihood of p-spherically symmetric distribution with radial gamma' sys.stdout.flush() for k in range(5): print '\t--> test case ' + str(k) dat = io.loadmat(self.matpath + '/TestPSphericallySymmetric' + str(k) + '.mat', struct_as_record=True) truell = np.squeeze(dat['ll']) p = Distributions.LpSphericallySymmetric({ 'p': dat['p'], 'n': dat['n'], 'rp': Distributions.Gamma({ 's': dat['s'], 'u': dat['u'] }) }) dat = Data(dat['X']) ll = p.loglik(dat) for i in range(len(ll)): self.assertFalse(np.abs(ll[i]-truell[i]) > self.Tol,\ 'Log-likelihood for p-spherically symmetric with radial gamma deviates from test case')
def test_derivatives(self): print "Testing derivative for p-spherically symmetric distribution with radial gamma" sys.stdout.flush() myu = 3.0 * np.random.rand(1)[0] + 1.0 mys = 3.0 * np.random.rand(1)[0] + 1.0 myp = 2 * np.random.rand(1)[0] + .5 n = 4 p = Distributions.LpSphericallySymmetric({ 'p': myp, 'n': n, 'rp': Distributions.Gamma({ 's': mys, 'u': myu }) }) dat = p.sample(50) df = p.dldx(dat) h = 1e-8 df2 = np.array(dat.X * np.Inf) for k in range(n): y = np.array(dat.X) y[k, :] += h df2[k, :] = (p.loglik(Data(y)) - p.loglik(dat)) / h self.assertFalse(np.max(np.abs(df-df2).flatten()) > self.llTol,\ 'Difference ' + str(np.max(np.abs(df-df2).flatten())) + ' in derivative of log-likelihood for p-spherically symmetric greater than ' + str(self.llTol)) print "[Ok]"
def test_derivatives(self): print "Testing derivatives w.r.t. data ... " sys.stdout.flush() P = [] for k in range(10): myp = 2.0 * np.random.rand(1)[0] + .5 mys = 3.0 * np.random.rand(1)[0] + 1.0 p = Distributions.ExponentialPower({'p': myp, 's': mys}) P.append(p) p = Distributions.ProductOfExponentialPowerDistributions({'P': P}) dat = p.sample(100) h = 1e-7 tol = 1e-4 Y0 = dat.X.copy() df = p.dldx(dat) df2 = 0.0 * df for i in xrange(dat.size(0)): y = Y0.copy() y[i, :] = y[i, :] + h df2[i, :] = (p.loglik(Data(y)) - p.loglik(dat)) / h prot = {} prot[ 'message'] = 'Difference in derivative of log-likelihood for PowerExponential greater than ' + str( tol) prot['max difference'] = np.max(np.abs((df - df2).flatten())) prot['mean difference'] = np.mean(np.abs((df - df2).flatten())) self.assertTrue( np.max(np.abs(df - df2)) < tol, Auxiliary.prettyPrintDict(prot))
def histogram(self, dat, cdf=False, ax=None, plotlegend=True, bins=None): """ Plots a histogram of the data points in dat. This works only for 1-dimensional distributions. It also plots the pdf of the distribution. :param dat: data points that enter the histogram :type dat: natter.DataModule.Data :param cdf: boolean that indicates whether the cdf should be plotted or not (default: False) :param ax: axes object the histogram is plotted into if it is not None. :param plotlegend: boolean indicating whether a legend should be plotted (default: True) :param bins: number of bins to be used. If None (default), the bins are automatically determined. """ sh = shape(dat.X) if len(sh) > 1 and sh[0] > 1: raise Errors.DimensionalityError( 'Cannont plot data with more than one dimension!') if ax == None: fig = plt.figure() ax = fig.add_axes([.1, .1, .8, .8]) x = squeeze(dat.X) if bins is None: bins = max(sh) / 200 n, bins, patches = ax.hist(x, bins=bins, normed=1, facecolor='blue', alpha=0.8, lw=0.0) bincenters = 0.5 * (bins[1:] + bins[:-1]) y = squeeze(self.pdf(Data(bincenters))) ax.plot(bincenters, y, 'k--', linewidth=2) if hasattr(self, 'cdf') and cdf: z = squeeze(self.cdf(Data(bincenters))) ax.plot(bincenters, z, 'k.-', linewidth=2) if plotlegend: plt.legend(('p.d.f.', 'c.d.f.', 'Histogram'), frameon=False) elif plotlegend: plt.legend(('p.d.f.', 'Histogram'), frameon=False) ax.set_xlabel('x') ax.set_ylabel('Probability') ax.set_xlim(min(x), max(x)) ax.grid(True)
def loadnpz(path, varname=None, transpose=None): """ Loads a npz file from the specified path. If no variable name is passed to the function it prints all variables and asks for user input. :param path: Path to the .npz file. :type path: string :param varname: Name of the variable to be loaded from the .npz file. :type varname: string :param transpose: Transpose of variable shall be loaded or the orientation shall be guessed :type transpose: bool :returns: Data object with the data from the specified file. :rtype: natter.DataModule.Data """ fin = np.load(path) if varname is not None: if fin.keys().count(varname) > 0: dat = atleast_2d(fin[varname]) else: raise ValueError, 'Given variable name "%s" does not exist in file "%s".' % ( varname, path) else: stdout.write('Variables in "%s":\n' % (path)) for var in fin.keys(): stdout.write(var + '\n') stdout.write('Which variable should be loaded: ') var = stdin.readline()[:-1] if fin.keys().count(var) > 0: dat = atleast_2d(fin[var]) else: raise ValueError, 'Given variable name "%s" does not exist in file "%s".' % ( var, path) if transpose == None: if dat.shape[0] > dat.shape[1]: transpose = True else: transpose = False if transpose: return Data(dat.T, 'npz data from ' + path) else: return Data(dat, 'npz data from ' + path)
def sample(self, m): """ Samples m samples from the current LpNestedSymmetric distribution. :param m: Number of samples to draw. :type m: int. :returns: A Data object containing the samples :rtype: natter.DataModule.Data """ ret = zeros((self.param['f'].n[()], m)) r = beta(float(self.param['f'].n[()]), 1.0, (1, m)) _recsample((), r, self.param['f'], m, ret) ret = Data(ret, 'Samples from ' + self.name) ret.scale(self.param['rp'].sample(m).X / self.param['f'].f(ret).X) return ret
def test_LogDetRadialTransform(self): print "Testing logdet of radial transformation ... " sys.stdout.flush() p = np.random.rand() * 3. + .5 # source distribution psource = Distributions.LpSphericallySymmetric({'p': p}) # target distribution ptarget = Distributions.LpSphericallySymmetric({ 'p': p, 'rp': Distributions.Gamma({ 'u': np.random.rand() * 3.0, 's': np.random.rand() * 2.0 }) }) # create Filter F = NonlinearTransformFactory.RadialTransformation(psource, ptarget) # sample data from source distribution dat = psource.sample(100) # apply filter to data dat2 = F * dat logDetJ = F.logDetJacobian(dat) logDetJ2 = 0 * logDetJ h = 1e-8 tmp = Data(dat.X.copy()) tmp.X[0, :] += h W1 = ((F * tmp).X - dat2.X) / h tmp = Data(dat.X.copy()) tmp.X[1, :] += h W2 = ((F * tmp).X - dat2.X) / h for i in range(dat.numex()): logDetJ2[i] = np.log( np.abs(W1[0, i] * W2[1, i] - W1[1, i] * W2[0, i])) self.assertFalse(np.max(np.abs(logDetJ - logDetJ2)) > self.detTol,\ 'Log determinant of radial transformation deviates by more than ' + str(self.detTol) + '!')
def histogram(self, dat, cdf=False, ax=None, plotlegend=True): """ Plots a histogram of the data points in dat. This works only for 1-dimensional distributions. It also plots the pdf of the distribution. :param dat: data points that enter the histogram :type dat: natter.DataModule.Data :param cdf: boolean that indicates whether the cdf should be plotted or not (default: False) :param ax: axes object the histogram is plotted into if it is not None. :param plotlegend: boolean indicating whether a legend should be plotted (default: True) """ b = array(self.param['b']) d = (b[1:] - b[:-1]) / 2.0 b[1:] = b[1:] - d b[0] -= d[0] b = hstack((b, b[-1] + 2.0 * d[-1])) h = histogram(squeeze(dat.X), bins=b)[0] h = h / sum(h) / (b[1:] - b[:-1]) if ax == None: fig = figure() ax = fig.add_axes([.1, .1, .8, .8]) d2 = b[1:] - b[:-1] ax.bar(b[:-1], h, width=d2) bincenters = linspace(b[0], b[-1], 1000) y = squeeze(self.pdf(Data(bincenters))) ax.plot(bincenters, y, 'k--', linewidth=2) if hasattr(self, 'cdf') and cdf: z = squeeze(self.cdf(Data(bincenters))) ax.plot(bincenters, z, 'k.-', linewidth=2) if plotlegend: legend(('p.d.f.', 'c.d.f.', 'Histogram')) elif plotlegend: legend(('p.d.f.', 'Histogram')) ax.set_xlabel('x') ax.set_ylabel('Probability') ax.grid(True)
def sample(self, m): ''' Samples m examples from the distribution. :param m: number of patches to sample :type m: int :returns: Samples from the ChiP distribution :rtype: natter.DataModule.Data ''' return Data(stats.beta.rvs(self['alpha'], self['beta'], size=(m, )))
def objective(self, W, nargout,dat,q): """ The objective function to be optimized with Auxiliary.Optimization.StGradient. It computes the mean likelihood :param W: current matrix W :type W: numpy.ndarray :param nargout: number of output arguments. If 1, returns objective, otherwise the derivative as well :param dat: data points at which the objective is evaluated :type dat: natter.DataModule.Data :param q: base distribution :type q: natter.Distributions.Distribution :returns: value of the objective and the derivative (if nargout != 1) """ (n,m) = dat.size() if nargout == 1: return (sum(q.loglik(Data(array(dot(W,dat.X)))))/m/n/log(2.),) else: return (sum(q.loglik(Data(array(dot(W,dat.X)))))/m/n/log(2.), \ dot(q.dldx(Data(array(dot(W,dat.X)))),\ dat.X.transpose())/m/n/log(2))
def ppf(self, X): ''' Evaluates the percentile function (inverse c.d.f.) for a given array of quantiles. :param X: Percentiles for which the ppf will be computed. :type X: numpy.array :returns: A Data object containing the values of the ppf. :rtype: natter.DataModule.Data ''' return Data(X)
def test_derivatives(self): print "Testing derivatives w.r.t. data ... " sys.stdout.flush() p = Distributions.MixtureOfGaussians({'K': 5}) dat = p.sample(100) h = 1e-7 tol = 1e-6 y = np.array(dat.X) + h df = p.dldx(dat) df2 = (p.loglik(Data(y)) - p.loglik(dat)) / h self.assertFalse(np.max(np.abs(df-df2)) > tol,\ 'Difference ' +str(np.max(np.abs(df-df2))) +' in derivative of log-likelihood for MixtureOfGaussians greater than ' + str(tol))
def sample(self,m): """ Samples m samples from the current LpSphericallySymmetric distribution. :param m: Number of samples to draw. :type m: int. :returns: A Data object containing the samples :rtype: natter.DataModule.Data """ # sample from a p-generlized normal with scale 1 z = gamma(1/self.param['p'],1.0,(self.param['n'],m)) z = abs(z)**(1/self.param['p']) dat = Data(z * sign(randn(self.param['n'],m)),'Samples from ' + self.name, \ ['sampled ' + str(m) + ' examples from Lp-generalized Normal']) # normalize the samples to get a uniform distribution. dat.normalize(self.param['p']) r = self.param['rp'].sample(m) dat.scale(r) return dat
def test_LpEntropy(self): print "Testing Lp-Entropy estimator" n = randint(5) + 1 s = 10.0 * rand() x = randn(n, 20000) * s h = n * .5 * log(2.0 * pi * e * s**2) dat = Data(x) h2 = Entropy.LpEntropy(dat) self.assertTrue( abs(h - h2) < self.tol, 'Entropy estimates for LpEntropy differ by more than ' + str(self.tol))
def test_marginalEntropyEstimators(self): print "Testing marginal entropy estimation ..." stdout.flush() s = 10.0 * rand(2, 1) x = randn(2, 10000) * s h = .5 * log(2.0 * pi * e * s**2) dat = Data(x) for method in ['MLE', 'JK', 'CAE', 'MM']: h2 = Entropy.marginalEntropy(dat, method) self.assertTrue( max(abs(h - h2)) < self.tol, 'Entropy estimates for ' + method + 'differ by more than ' + str(self.tol))
def sample(self, m): """ Samples m samples from the current GammaP distribution. :param m: Number of samples to draw. :type m: int. :returns: A Data object containing the samples :rtype: natter.DataModule.Data """ return Data(exp(randn(1, m) * self.param['s'] + self.param['mu']), str(m) + ' samples from ' + self.name)
def f(self, dat): """ Computes the value of the Lp-nested funtion at the vectors in dat. Alternatively you can directly call the object on the data, i.e. use *L(dat)* instead of *L.f(dat)*. :param dat: Data on which the LpNestedFunction will be evaluated. :type dat: natter.DataModule.Data :returns: A Data object containing the function values :rtype: natter.DataModule.Data """ return Data(computerec(self.tree, dat.X, self.p))
class TestDirichlet(unittest.TestCase): X = Data( np.array([[ 0.1042605373, 0.0443097862, 0.0032503423, 0.0420286884, 0.1194181369, 0.1848512638, 0.0906818056, 0.4223094329, 0.4998465219, 0.0078395240 ], [ 0.7299213688, 0.5167476582, 0.4604688785, 0.4604338136, 0.4221988687, 0.7307655970, 0.6077871086, 0.1683807824, 0.4403800496, 0.7195288939 ], [ 0.1658180939, 0.4389425556, 0.5362807793, 0.4975374980, 0.4583829944, 0.0843831391, 0.3015310858, 0.4093097847, 0.0597734285, 0.2726315821 ]])) LL = np.array([ 1.3689065138, 1.7564748726, 2.7472333803, 1.7160392427, 1.1921277658, 0.8798846426, 1.4837080045, -0.2267826679, -0.1300239648, 2.5635963658 ]) alpha = np.array([0.6086919517, 1.9573600512, 1.3938315963]) Tol = 1e-7 TolParam = 5 * 1e-2 def test_loglik(self): print "Testing log-likelihood of Dirichlet distribution ... " sys.stdout.flush() p = Distributions.Dirichlet({'alpha': self.alpha}) l = p.loglik(self.X) for k in range(len(self.LL)): self.assertTrue( np.abs(l[k] - self.LL[k]) < self.Tol, 'Difference in log-likelihood for Dirichlet greater than ' + str(self.Tol)) def test_estimate(self): print "Testing parameter estimation of Dirichlet distribution ..." sys.stdout.flush() myalpha = 10.0 * np.random.rand(10) p = Distributions.Dirichlet({'alpha': myalpha}) dat = p.sample(50000) p = Distributions.Dirichlet({'alpha': np.random.rand(10)}) p.estimate(dat) alpha = p.param['alpha'] self.assertTrue( np.max(np.abs(alpha - myalpha)) < self.TolParam, 'Difference in alpha parameter for Dirichlet distribution greater than ' + str(self.TolParam))
def sample(self,m): """ Samples m samples from the current TruncatedGaussian distribution. :param m: Number of samples to draw. :type m: int. :rtype: natter.DataModule.Data :returns: A Data object containing the samples """ a,b = (self.param['a']-self.param['mu'])/self.param['sigma'],(self.param['b']-self.param['mu'])/self.param['sigma'] return Data(truncnorm.rvs(a,b,loc=self.param['mu'],scale=self.param['sigma'],size=m),'%i samples from %s' % (m,self.name))
def ppf(self,u): ''' Evaluates the percentile function (inverse c.d.f.) for a given array of quantiles. :param u: Percentiles for which the ppf will be computed. :type u: numpy.array :returns: A Data object containing the values of the ppf. :rtype: natter.DataModule.Data ''' a,b = (self.param['a']-self.param['mu'])/self.param['sigma'],(self.param['b']-self.param['mu'])/self.param['sigma'] return Data(truncnorm.ppf(u,a,b,loc=self.param['mu'],scale=self.param['sigma']), 'Percentiles from a %s' % (self.name,))
def ppf(self, U): ''' Evaluates the percentile function (inverse c.d.f.) for a given array of quantiles. :param U: Percentiles for which the ppf will be computed. :type U: numpy.array :returns: A Data object containing the values of the ppf. :rtype: natter.DataModule.Data ''' return Data( gamma.ppf(U, self.param['u'], scale=self.param['s'])**(1 / self.param['p']))
def ppf(self,u,bounds=None,maxiter=1000): ''' Evaluates the percentile function (inverse c.d.f.) for a given array of quantiles. The single mixture components must implement ppf and pdf. NOTE: ppf works only for one dimensional mixture distributions. :param u: Percentiles for which the ppf will be computed. :type u: numpy.array :param bounds: a tuple of two array of the same size of u that specifies the initial upper and lower boundaries for the bisection method. :type bounds: tuple of two numpy.array :param maxiter: maximum number of iterations :type maxiter: int :returns: A Data object containing the values of the ppf. :rtype: natter.DataModule.Data ''' ret = Data(u,'Percentiles from ' + self.name) # use bisection method on to invert #v = squeeze(log(u/(1-u))) if bounds is not None: lb = Data(bounds[0]) ub = Data(bounds[1]) elif self.param['P'][0].param.has_key('a') and self.param['P'][0].param.has_key('b'): warn("\tAssuming that the keys a=%.2g and b=%.2g in %s refer to boundaries. Using those..." % (self.param['P'][0]['a'],self.param['P'][0]['b'],self.param['P'][0].name,)) lb = Data(0*u+self.param['P'][0]['a']) ub = Data(0*u+self.param['P'][0]['b']) else: lb = Data(u*0-1e6) ub = Data(u*0+1e6) def f(dat): # c = self.cdf(dat) # return v - log(c/(1-c)) return u-self.cdf(dat) iterC = 0 while max(ub.X-lb.X) > 5*1e-10 and iterC < maxiter: ret.X = (ub.X+lb.X)/2 mf = f(ret) lf = f(lb) uf = f(ub) if any(lf*uf>0): warn("ppf lost the root! resetting boundaries") ind0 = where(lf*uf > 0) ub.X[0,ind0[0]] = 4*abs(ub.X[0,ind0[0]]+1) lb.X[0,ind0[0]] = -4*abs(lb.X[0,ind0[0]]+1) ind0 = where(mf*lf < 0) ind1 = where(mf*uf < 0) ub.X[0,ind0[0]] = ret.X[0,ind0[0]] lb.X[0,ind1[0]] = ret.X[0,ind1[0]] iterC +=1 sys.stdout.write(80*" " + "\r\tFiniteMixtureDistribution.ppf maxdiff: %.4g, meandiff: %.4g" % (max(ub.X-lb.X),mean(ub.X-lb.X))) sys.stdout.flush() if iterC == maxiter: warn("FiniteMixtureDistribution.ppf: Maxiter reached! Exiting. Bisection method might not have been converged. Maxdiff is %.10g. Mean diff is %.4g" % ( max(ub.X-lb.X),mean(ub.X-lb.X))) #sys.stdout.write("\n") return ret