def test_loglik(self): nsamples = 1000000 Gauss = Gaussian(n=1,mu=array([0]),sigma=array([[4]])) dat = Gauss.sample(nsamples) logWeights = self.mog.loglik(dat) - Gauss.loglik(dat) Z = logsumexp(logWeights)-log(nsamples) print "test_loglik: z: " ,exp(Z) self.assertTrue(abs(exp(Z)-1)<1e-01)
def test_loglik(self): nsamples = 1000000 Gauss = Gaussian(n=1, mu=array([0]), sigma=array([[4]])) dat = Gauss.sample(nsamples) logWeights = self.mog.loglik(dat) - Gauss.loglik(dat) Z = logsumexp(logWeights) - log(nsamples) print "test_loglik: z: ", exp(Z) self.assertTrue(abs(exp(Z) - 1) < 1e-01)
def fill_dict_with_defaults(dic): """ Helper function, which sets missing values for a given dictionary containing information about the distribution to test. Argument: :param dic: dictionary containing information about the distribution to test. :type dic: dictionary Output: :returns: dictionary with missing values filled in. :rtype: dictionary """ RD = cp(dic) try: if isinstance(RD['dist'](), RD['dist']): RD['dist'] = RD['dist']() except TypeError: pass except: raise AssertionError( 'Distribution %s does not seem to support a default distribution (called with no arguments) ' % (RD['dist'])) if not RD['dist'].param.has_key('n'): n = 1 else: n = RD['dist']['n'] if not 'nsamples' in dic.keys(): RD['nsamples'] = 500000 if not 'tolerance' in dic.keys(): RD['tolerance'] = 1e-01 if not 'support' in dic.keys(): RD['support'] = (-np.inf, np.inf) if not 'proposal_low' in dic.keys(): if RD['support'][1] - RD['support'][0] < np.inf: RD['proposal_low'] = Uniform({ 'n': n, 'low': RD['support'][0], 'high': RD['support'][1] }) elif RD['support'][0] == 0 and RD['support'][1] == np.inf: RD['proposal_low'] = Gamma({'u': 1., 's': 2.}) else: RD['proposal_low'] = Gaussian({'n': n, 'sigma': np.eye(n) * 0.8}) if not 'proposal_high' in dic.keys(): if RD['support'][1] - RD['support'][0] < np.inf: RD['proposal_high'] = Uniform({ 'n': n, 'low': RD['support'][0], 'high': RD['support'][1] }) elif RD['support'][0] == 0 and RD['support'][1] == np.inf: RD['proposal_high'] = Gamma({'u': 3., 's': 2.}) else: RD['proposal_high'] = Gaussian({'n': n, 'sigma': np.eye(n) * 10}) return RD
def laplaceApproximation(listOfDist, initPoint=None): """Function that calculates the Laplace approximation for the product of likelihood functions associated with the the given list of Distributions. It returns a Gaussian Distribution with the mean set to the mode of the product of the likelihoods given in the input list and the variance to the negative, inverse Hessian of that product. Note that the resulting distribution is a distribution over parameters, whereas the input list is a distribution over datapoints. Therefore, to include a prior distribution over parameters within the product using this implementation, a particularly designed data-distribution has to be implemented. The distributions in the list are assumed to have implemented dldx as well as dldx2 method (first and second derivative) :param listOfDist: List of distributions from which the product is build as a product over the corresponding likelihood functions. :type listOfDist: list of natter.Distributions :param initPoint: Initial point for the mean of the laplace Approximation (optional argument). If None is given, then the parameters from the first distribution in the list are taken as an initial point. :type initPoint: numpy.ndarray :returns: laplace approximated distribution :rtype: natter.Distributions.Gaussian """ # check argument for dist in listOfDist: OK = hasattr(dist, 'dldx') OK = OK and hasattr(dist, 'dldx2') if not OK: raise ValueError('Distribution has not implemented dldx or dldx2') if initPoint is None: initPoint = listOfDist[0].primary2array() #sample(1).X.flatten() def f(x): fx = 0.0 for dist in listOfDist: fx -= dist.loglik(x) return fx def df(x): grad = zeros(len(initPoint.flatten())) for dist in listOfDist: grad = grad - dist.dldx(Data(x)) return grad def ddf(x): H = zeros((len(initPoint.flatten()), len(initPoint.flatten()))) for dist in listOfDist: H = H - dist.dldx2(Data(x)) return H xmin, fopt, gopt, Hopt, func_calls, grad_calls, warnflag = \ fmin_bfgs(f,initPoint,fprime=df) Hopt = ddf(xmin) laplaceApprox = Gaussian({'n': len(xmin.flatten())}) laplaceApprox['mu'] = xmin laplaceApprox['sigma'] = inv(Hopt) return laplaceApprox
def MarginalHistogramEqualization(psource, ptarget=None): """ Creates a non-linear filter that changes the marginal distribution of each single data dimension independently. For that sake it takes two ISA models and performs a histogram equalization on each of the marginal distributions. *Important*: The ISA models must have one-dimensional subspaces! If ptarget is omitted, it will be set to a N(0,I) Gaussian by default. :param psource: Source distribution which must be a natter.Distributions.ISA model with one-dimensional subspaces :type psource: natter.Distributions.ISA :param ptarget: Target distribution which must be a natter.Distributions.ISA model with one-dimensional subspaces :type ptarget: natter.Distributions.ISA :returns: A non-linear filter that changes for marginal distributions of the data from the respective psource into the respective ptarget :rtype: natter.Transforms.NonlinearTransform """ from natter.Distributions import ISA, Gaussian if not isinstance(psource, ISA): raise TypeError( 'Transform.TransformFactory.MarginalHistogramEqualization: psource must be an ISA model' ) else: psource = psource.copy() if not ptarget == None and not isinstance(ptarget, ISA): raise TypeError( 'Transform.TransformFactory.MarginalHistogramEqualization: ptarget must be an ISA model' ) for ss in psource['S']: if len(ss) != 1: raise Errors.DimensionalityError( 'Transform.TransformFactory.MarginalHistogramEqualization: psource must have one-dimensional subspaces' ) if ptarget == None: ptarget = ISA(S=[(k, ) for k in range(psource['n'])], P=[Gaussian(n=1) for k in range(psource['n'])]) else: ptarget = ptarget.copy() g = lambda dat: reduce(lambda x, y: x.stack(y), [ ptarget['P'][k].ppf(psource['P'][k].cdf(dat[k, :])) for k in range(psource['n']) ]) gdet = lambda y: psource.loglik(y) - ptarget.loglik(g(y)) name = 'Marginal Histogram Equalization Transform: %s --> %s' % ( psource['P'][0].name, ptarget['P'][0].name) return NonlinearTransform(g, name, logdetJ=gdet)
def test_estimate(self): sigm1 = symrand(2) sigm1 = np.dot(sigm1, sigm1.T) sigm2 = symrand(2) sigm2 = np.dot(sigm2, sigm2.T) P = [ Gaussian(n=2, mu=2 * randn(2), sigma=sigm1), Gaussian(n=2, mu=2 * randn(2), sigma=sigm2) ] # P = [Gamma(n=1,u=3*rand(),s=3*rand()) for k in xrange(self.K)] # P = [TruncatedGaussian(a=0.1,b=10,mu=3*randn(1),sigma=3*rand(1,1)) for k in xrange(self.K)] alpha = rand(2) alpha = alpha / sum(alpha) mog = FiniteMixtureDistribution(P=P, alpha=alpha) mog.primary = ['alpha', 'P'] dat = mog.sample(500) arr0 = mog.primary2array() print mog mog.array2primary(arr0 + np.random.rand(len(arr0)) * 1e-04) mog.estimate(dat, method='hybrid') err = np.sum(np.abs(arr0 - mog.primary2array())) print mog print "error: ", err self.assert_(err < 1.0)
def setUp(self): self.n = 1 self.K = 3 self.nsamples = 10000 self.a = 0.1 self.b = 10 alpha = rand(self.K) alpha = alpha / sum(alpha) P = [ Gaussian(n=1, mu=3 * randn(1), sigma=3 * rand(1, 1)) for k in xrange(self.K) ] #P = [TruncatedGaussian(a=self.a,b=self.b,mu=3*randn(1),sigma=3*rand(1,1)+1) for k in xrange(self.K)] #P = [Gamma(n=1,u=3*rand(),s=3*rand()) for k in xrange(self.K)] self.mog = FiniteMixtureDistribution(P=P, alpha=alpha) self.mog.primary = ['alpha', 'P'] self.dat = self.mog.sample(self.nsamples)
def setUp(self): self.Gauss = Gaussian({'n': 1}) self.entropy = 0.5 * (1 + log(2 * pi)) self.Gauss2D = Gaussian({'n': 2}) self.Gauss2D.primary = ['mu', 'sigma']
class TestGaussian(unittest.TestCase): def setUp(self): self.Gauss = Gaussian({'n': 1}) self.entropy = 0.5 * (1 + log(2 * pi)) self.Gauss2D = Gaussian({'n': 2}) self.Gauss2D.primary = ['mu', 'sigma'] def test_init(self): pass def test_sample(self): d = self.Gauss.sample(10) pass def test_loglik(self): nsamples = 1e06 dat = self.Gauss.sample(nsamples) lv = self.Gauss.loglik(dat) hs = sum(-lv) / nsamples # sampled entropy self.assertTrue(abs(hs - self.entropy) <= 1e-02) def test_array2primary(self): arr = self.Gauss.primary2array() pbefore = self.Gauss.param.copy() arrr = randn(len(arr)) self.Gauss.array2primary(arrr) self.Gauss.array2primary(arr) pafter = self.Gauss.param.copy() diff = 0.0 for key in pbefore.keys(): diff += norm(pbefore[key] - pafter[key]) self.assertTrue(diff <= 1e-05) def test_dldtheta(self): d = self.Gauss2D.sample(10) def f(X): self.Gauss2D.array2primary(X) lv = self.Gauss2D.loglik(d) slv = sum(lv) return slv def df(X): self.Gauss2D.array2primary(X) gv = self.Gauss2D.dldtheta(d) sgv = sum(gv, axis=1) return sgv theta0 = self.Gauss2D.primary2array() theta0 = abs(randn(len(theta0))) + 1 err = check_grad(f, df, theta0) print "error in gradient: ", err self.assertTrue(err < 1e-02) def test_estimate(self): data = self.Gauss2D.sample(500) self.Gauss2D.primary = ['sigma'] thetaOrig = self.Gauss2D.primary2array() theta0 = abs(randn(len(thetaOrig))) self.Gauss2D.array2primary(theta0) self.Gauss2D.estimate(data, method="gradient") thetaOpt = self.Gauss2D.primary2array() err1 = thetaOpt - thetaOrig print "Error in thetas with gradient: ", norm(err1) data = self.Gauss2D.sample(1000000) theta0 = abs(randn(len(thetaOrig))) self.Gauss2D.array2primary(theta0) self.Gauss2D.estimate(data, method="analytic") thetaOpt = self.Gauss2D.primary2array() err2 = thetaOpt - thetaOrig print "Error in thetas maxim likelihood: ", norm(err2) self.assertTrue((norm(err2) < 1e-01))
def setUp(self): self.Gauss = Gaussian({'n':1}) self.entropy = 0.5*(1+log(2*pi)) self.Gauss2D = Gaussian({'n':2}) self.Gauss2D.primary=['mu', 'sigma']
class TestGaussian(unittest.TestCase): def setUp(self): self.Gauss = Gaussian({'n':1}) self.entropy = 0.5*(1+log(2*pi)) self.Gauss2D = Gaussian({'n':2}) self.Gauss2D.primary=['mu', 'sigma'] def test_init(self): pass def test_sample(self): d = self.Gauss.sample(10) pass def test_loglik(self): nsamples=1e06 dat=self.Gauss.sample(nsamples) lv = self.Gauss.loglik(dat) hs = sum(-lv)/nsamples # sampled entropy self.assertTrue( abs(hs-self.entropy)<= 1e-02 ) def test_array2primary(self): arr = self.Gauss.primary2array(); pbefore = self.Gauss.param.copy() arrr = randn(len(arr)) self.Gauss.array2primary(arrr) self.Gauss.array2primary(arr); pafter = self.Gauss.param.copy() diff = 0.0; for key in pbefore.keys(): diff += norm(pbefore[key] - pafter[key]) self.assertTrue(diff <=1e-05) def test_dldtheta(self): d = self.Gauss2D.sample(10) def f(X): self.Gauss2D.array2primary(X) lv = self.Gauss2D.loglik(d); slv = sum(lv) return slv def df(X): self.Gauss2D.array2primary(X) gv = self.Gauss2D.dldtheta(d) sgv = sum(gv, axis=1); return sgv theta0 = self.Gauss2D.primary2array() theta0 = abs(randn(len(theta0)))+1 err = check_grad(f,df,theta0) print "error in gradient: ", err self.assertTrue(err < 1e-02) def test_estimate(self): data = self.Gauss2D.sample(500) self.Gauss2D.primary= ['sigma'] thetaOrig = self.Gauss2D.primary2array() theta0 = abs(randn(len(thetaOrig))) self.Gauss2D.array2primary(theta0) self.Gauss2D.estimate(data,method="gradient") thetaOpt = self.Gauss2D.primary2array() err1 = thetaOpt-thetaOrig print "Error in thetas with gradient: " , norm(err1) data = self.Gauss2D.sample(1000000) theta0 = abs(randn(len(thetaOrig))) self.Gauss2D.array2primary(theta0) self.Gauss2D.estimate(data,method="analytic") thetaOpt = self.Gauss2D.primary2array() err2 = thetaOpt-thetaOrig print "Error in thetas maxim likelihood: " , norm(err2) self.assertTrue((norm(err2)<1e-01))
def setUp(self): self.n =2 self.W = LinearTransform(eye(self.n)) self.ECG = EllipticallyContourGamma(n=self.n,W= self.W) self.Gaussian = Gaussian({'n':self.n}) self.data = self.Gaussian.sample(1000)
class TestEllipticallyContourGamma(unittest.TestCase): def setUp(self): self.n =2 self.W = LinearTransform(eye(self.n)) self.ECG = EllipticallyContourGamma(n=self.n,W= self.W) self.Gaussian = Gaussian({'n':self.n}) self.data = self.Gaussian.sample(1000) def test_init(self): pass def test_array2primary(self): abefore = self.ECG.primary2array(); #arrr = randn(len(abefore)) # abefore=arrr #self.ECG.array2primary(arrr) #aafter = self.ECG.primary2array() #diff = norm(abefore-aafter) #self.assertTrue(diff <=1e-05) def test_loglik(self): nsamples=100000 dataImportance = self.Gaussian.sample(nsamples) logweights = self.ECG.loglik(dataImportance)-self.Gaussian.loglik(dataImportance) Z = logsumexp(logweights)-log(nsamples) self.assertTrue(abs(exp(Z)-1)<1e-01) def test_sample(self): nsamples = 10000 data = self.ECG.sample(nsamples) logWeights = self.Gaussian.loglik(data) -self.ECG.loglik(data) Z = logsumexp(logWeights)-log(nsamples) self.assertTrue(abs(exp(Z)-1)<1e-01) def test_dldtheta(self): self.ECG.primary = ['q'] def f(X): self.ECG.array2primary(X) lv = self.ECG.loglik(self.data); slv = sum(lv) return slv def df(X): self.ECG.array2primary(X) gv = self.ECG.dldtheta(self.data) sgv = sum(gv, axis=1); return sgv theta0 = self.ECG.primary2array() theta0 = abs(randn(len(theta0)))+1 err = check_grad(f,df,theta0) print "error in gradient: ", err self.ECG.primary = ['W'] def f2(X): self.ECG.array2primary(X) lv = self.ECG.loglik(self.data); slv = sum(lv) return slv def df2(X): self.ECG.array2primary(X) gv = self.ECG.dldtheta(self.data) sgv = sum(gv, axis=1); return sgv theta0 = self.ECG.primary2array() theta0 = abs(randn(len(theta0)))+1 err = check_grad(f2,df2,theta0) print "error in gradient: ", err self.assertTrue(err < 1e-02)