def _simple_logistic_regression(x,y,beta_start=None,verbose=False,
                               CONV_THRESH=1.e-3,MAXIT=500):
    """
 Faster than logistic_regression when there is only one predictor.
    """
    if len(x) != len(y):
        raise ValueError, "x and y should be the same length!"
    if beta_start is None:
        beta_start = NA.zeros(2,x.typecode())
    iter = 0; diff = 1.; beta = beta_start  # initial values
    if verbose:
        print 'iteration  beta log-likliehood |beta-beta_old|' 
    while iter < MAXIT:
        beta_old = beta 
        p = NA.exp(beta[0]+beta[1]*x)/(1.+NA.exp(beta[0]+beta[1]*x))
        l = NA.sum(y*NA.log(p) + (1.-y)*NA.log(1.-p)) # log-likliehood
        s = NA.array([NA.sum(y-p), NA.sum((y-p)*x)])  # scoring function
        # information matrix
        J_bar = NA.array([[NA.sum(p*(1-p)),NA.sum(p*(1-p)*x)],
                          [NA.sum(p*(1-p)*x),NA.sum(p*(1-p)*x*x)]])
        beta = beta_old + NA.dot(LA.inverse(J_bar),s) # new value of beta
        diff = NA.sum(NA.fabs(beta-beta_old)) # sum of absolute differences
        if verbose:
            print iter+1, beta, l, diff
        if diff <= CONV_THRESH: break
        iter = iter + 1
    return beta, J_bar, l
def _simple_logistic_regression(x,y,beta_start=None,verbose=False,
                               CONV_THRESH=1.e-3,MAXIT=500):
    """
 Faster than logistic_regression when there is only one predictor.
    """
    if len(x) != len(y):
        raise ValueError, "x and y should be the same length!"
    if beta_start is None:
        beta_start = NA.zeros(2,x.dtype.char)
    iter = 0; diff = 1.; beta = beta_start  # initial values
    if verbose:
        print 'iteration  beta log-likliehood |beta-beta_old|' 
    while iter < MAXIT:
        beta_old = beta 
        p = NA.exp(beta[0]+beta[1]*x)/(1.+NA.exp(beta[0]+beta[1]*x))
        l = NA.sum(y*NA.log(p) + (1.-y)*NA.log(1.-p)) # log-likliehood
        s = NA.array([NA.sum(y-p), NA.sum((y-p)*x)])  # scoring function
        # information matrix
        J_bar = NA.array([[NA.sum(p*(1-p)),NA.sum(p*(1-p)*x)],
                          [NA.sum(p*(1-p)*x),NA.sum(p*(1-p)*x*x)]])
        beta = beta_old + NA.dot(LA.inverse(J_bar),s) # new value of beta
        diff = NA.sum(NA.fabs(beta-beta_old)) # sum of absolute differences
        if verbose:
            print iter+1, beta, l, diff
        if diff <= CONV_THRESH: break
        iter = iter + 1
    return beta, J_bar, l
 def _loglikelihood(self, vectors, priors, means, covariances):
     llh = 0.0
     for vector in vectors:
         p = 0
         for j in range(len(priors)):
             p += priors[j] * \
                      self._gaussian(means[j], covariances[j], vector)
         llh += numarray.log(p)
     return llh
Esempio n. 4
0
 def __call__(self,data):
   state = self._state = UniTable()
   state['data'] = data
   state['nullmodel'] = self.nullmodel(state['data'])
   state['altmodel'] = self.altmodel(state['data'])
   state['odds'] = state['altmodel']/state['nullmodel']
   state['log_odds'] = na.log(state['odds'])
   state['cusum'] = list(gen_cusum(state['log_odds'],self.reset_value))
   state['score'] = state['cusum'] >self.threshold
   return state['score'][-1]
Esempio n. 5
0
def vline(x=None, color='black'):
    if x == None:
        pt = Canvas().mouseData()
        x = pt[0]
        print x
    x = numarray.zeros(100) + x
    display = Canvas().getDisplay()
    yr = list(display.getRange("y"))
    if display.getBinWidth('x') > 0 and display.getBinWidth('y') < 0:
        yr[0] /= display.getBinWidth('x')
        yr[1] /= display.getBinWidth('x')
    ylog = display.getLog("y")
    if ylog:
        yr = ( numarray.log(yr[0]), numarray.log(yr[1]) )
    y = numarray.arange(100)/99.*(yr[1]-yr[0]) + yr[0]
    if ylog:
        y = numarray.exp(y)
    display.setAutoRanging("x", 0)
    display.setAutoRanging("y", 0)
    nt = newNTuple( (x, y), ('x', 'y'), register=0 )
    Scatter(nt, 'x', 'y', pointRep="Line", oplot=1, lineStyle='Dot',
            color=color)
Esempio n. 6
0
def plotLine(display, slope, intercept, xlog, ylog, lineStyle='Dot',
             color='black'):
    f = lambda x: slope*x + intercept
    xr = display.getRange("x")
    if xlog:
        xr = ( numarray.log(xr[0]), numarray.log(xr[1]) )
    xx = numarray.arange(100)/99.*(xr[1] - xr[0]) + xr[0]
    yy = numarray.array([f(x) for x in xx])
    if ylog:
        yy = numarray.exp(yy)
    if xlog:
        xx = numarray.exp(xx)
    ylabel = display.getLabel("y")
    xBinWidth = display.getDataRep().getBinWidth("x")
    if xBinWidth and ylabel == "Entries / bin":
        yy = yy/xBinWidth
    Canvas().selectDisplay( display )
    display.setAutoRanging("x", 0)
    display.setAutoRanging("y", 0)
    nt = newNTuple( (xx, yy), ('x', 'y'), register=0 )
    Scatter(nt, 'x', 'y', pointRep="Line", oplot=1, lineStyle=lineStyle,
            color=color)
Esempio n. 7
0
File: fit.py Progetto: certik/chemev
def simul(isodir):
    params = parameters()
    eps = 0.01
    #feh
    params.set("m0y", 0.9, -1, 1, True)
    params.set("m0cphi", -0.001, -pi / 2 + eps, 0, True)
    params.set("m0cr", 0.01, 0, 2, True)
    params.set("m1y", -0.9, -1, 1, True)
    params.set("m1cphi", 1.67, pi / 2 + eps, pi, True)
    params.set("m1cr", 1.06, 0, 2, True)

    params.set("sigma", 0.6, 0, 1, True)
    #sfr
    params.set("s0y", 1.11, 0.0, 2, True)
    params.set("s0cphi", 0.0003, 0, pi / 2 - eps, True)
    params.set("s0cr", 1, 0, 1, True)
    params.set("s1x", 9.9, 8, 10, True)
    params.set("s1y", 5.9, 1.5, 10, True)
    params.set("s1cphi", 4.61, pi / 2 + eps, pi * 3 / 2 - eps, True)
    params.set("s1cr", 0.49, 0, 0.5, True)
    params.set("s2y", 0.89, 0, 2, True)
    params.set("s2cphi", 3.04, pi / 2 + eps, pi - eps, True)
    params.set("s2cr", 0.001, 0, 2, True)
    if len(sys.argv) == 2:  #run with a param to start from the beginning
        params.save()
    params.load()

    data = iso.readfits(isodir + "/datarr.fits")
    isos = iso.readisos(isodir)
    t = utils.frange(8, 10.25, 0.001)

    def f(par):
        params.setvalues(par)
        w = utils.calculateweights(t, sfr(t, params))
        #isow=iso.getisosweights(w,10.**t,metallicity(t,params),isos)
        isow = iso.getisosweights_gauss(w, 10.**t, metallicity(t, params),
                                        isos, params.sigma)
        m = iso.computeCMD(isow, isos)
        m = utils.normalize(m, sum(data.flat))
        return utils.loglikelihood(m, data)

    d = numarray.maximum(data, 1e-20)
    llhC = sum((d * numarray.log(d)).flat)

    def b(par, value, iter):
        params.setvalues(par)
        params.save()
        print "henry:", value, "tom:", 2.0 * (value + llhC), "iter:", iter

    optimization.minmax(optimization.fmin_simplex, f, params.getvalues(),
                        params.min(), params.max(), b)
Esempio n. 8
0
File: fit.py Progetto: certik/chemev
def simul(isodir):
    params=parameters()
    eps=0.01
    #feh
    params.set("m0y"   ,0.9, -1,1,True)
    params.set("m0cphi",-0.001,  -pi/2+eps,0,True)
    params.set("m0cr"  ,0.01,  0,2,True)
    params.set("m1y"   ,-0.9,  -1,1,True)
    params.set("m1cphi",1.67,  pi/2+eps,pi,True)
    params.set("m1cr"  ,1.06,  0,2,True)

    params.set("sigma" ,0.6,  0,1, True)
    #sfr
    params.set("s0y"   ,1.11,   0.0,2,True)
    params.set("s0cphi",0.0003,   0,pi/2-eps,True)
    params.set("s0cr"  ,1,   0,1,True)
    params.set("s1x"   ,9.9,   8,10,True)
    params.set("s1y"   ,5.9, 1.5,10,True)
    params.set("s1cphi",4.61,   pi/2+eps,pi*3/2-eps,True)
    params.set("s1cr"  ,0.49,   0,0.5,True)
    params.set("s2y"   ,0.89, 0,2,True)
    params.set("s2cphi",3.04,   pi/2+eps,pi-eps,True)
    params.set("s2cr"  ,0.001,   0,2,True)
    if len(sys.argv) == 2: #run with a param to start from the beginning
        params.save()
    params.load()

    data=iso.readfits(isodir+"/datarr.fits")
    isos = iso.readisos(isodir)
    t=utils.frange(8,10.25,0.001)
    def f(par):
        params.setvalues(par)
        w=utils.calculateweights(t,sfr(t,params))
        #isow=iso.getisosweights(w,10.**t,metallicity(t,params),isos)
        isow=iso.getisosweights_gauss(w,10.**t,metallicity(t,params),isos,
                params.sigma)
        m=iso.computeCMD(isow,isos)
        m=utils.normalize(m,sum(data.flat))
        return utils.loglikelihood(m,data)

    d = numarray.maximum(data,1e-20)
    llhC=sum( (d*numarray.log(d)).flat )
    def b(par,value,iter):
        params.setvalues(par)
        params.save()
        print "henry:",value,"tom:",2.0*(value+llhC),"iter:",iter

    optimization.minmax(optimization.fmin_simplex,f,
            params.getvalues(),params.min(),params.max(),b)
Esempio n. 9
0
 def plot_rspgenIntegral(self, energy, inclination, phi=0, nsamp=2000):
     rmin = 1e-2
     rmax = 30.
     npts = 20
     rstep = num.log(rmax / rmin) / (npts - 1)
     radii = rmin * num.exp(rstep * num.arange(npts))
     self._setPsf(energy, inclination, phi)
     seps = []
     srcDir = SkyDir(180, 0)
     for i in range(nsamp):
         appDir = self.psf.appDir(energy, srcDir, self.scZAxis,
                                  self.scXAxis)
         seps.append(appDir.difference(srcDir) * 180. / num.pi)
     seps.sort()
     fraction = num.arange(nsamp, type=num.Float) / nsamp
     disp = plot.scatter(seps,
                         fraction,
                         xlog=1,
                         xname='ROI radius',
                         yname='enclosed Psf fraction',
                         pointRep='Line',
                         color='red')
     disp.setTitle("%s: %i MeV, %.1f deg" %
                   (self.irfs, energy, inclination))
     npred = []
     resids = []
     for radius in radii:
         npred.append(
             self.psf.angularIntegral(energy, inclination, phi, radius))
         resids.append(
             num.abs(
                 (self._interpolate(seps, fraction, radius) - npred[-1]) /
                 npred[-1]))
     plot.scatter(radii, npred, pointRep='Line', oplot=1)
     residplot = plot.scatter(radii,
                              resids,
                              'ROI radius',
                              yname='abs(sim - npred)/npred',
                              xlog=1,
                              ylog=1)
     #        Npred = Interpolator(radii, npred)
     ks_prob = ks2(npred, seps)
     plot.hline(0)
     residplot.setTitle("%s: %i MeV, %.1f deg\n ks prob=%.2e" %
                        (self.irfs, energy, inclination, ks_prob[1]))
     return energy, inclination, ks_prob[1]
Esempio n. 10
0
def compute_model(t,p,isos,data):
    """ Returns m,s,w,isow,model """
    m=fit.metallicity(t,p)
    s=fit.sfr(t,p)
    w=utils.calculateweights(t,s)
    if not p.pars.has_key('dsigmadlogt'):
        p.set('dsigmadlogt',0,0,False)
    if p.sigma > 0.:
        if p.dsigmadlogt == 0.:
            isow=iso.getisosweights_gauss(w,10.**t,m,isos,p.sigma)
        if p.dsigmadlogt != 0.:
            print "Gaussian sigma, ds/dlogt ",p.sigma,p.dsigmadlogt
            isow=iso.getisosweights_vgauss(w,10.**t,m,isos,p.sigma,p.dsigmadlogt)
    else:
        isow=iso.getisosweights(w,10.**t,m,isos)
    model=iso.computeCMD(isow,isos)
    model=utils.normalize(model,sum(data.flat))
    d = numarray.maximum(data,1e-20)
    llhC=sum( (d*numarray.log(d)).flat )
    value=utils.loglikelihood(model,data)
    print "henry:",value,"tom:",2.0*(value+llhC)
    return m,s,w,isow,model
Esempio n. 11
0
def compute_model(t, p, isos, data):
    """ Returns m,s,w,isow,model """
    m = fit.metallicity(t, p)
    s = fit.sfr(t, p)
    w = utils.calculateweights(t, s)
    if not p.pars.has_key('dsigmadlogt'):
        p.set('dsigmadlogt', 0, 0, False)
    if p.sigma > 0.:
        if p.dsigmadlogt == 0.:
            isow = iso.getisosweights_gauss(w, 10.**t, m, isos, p.sigma)
        if p.dsigmadlogt != 0.:
            print "Gaussian sigma, ds/dlogt ", p.sigma, p.dsigmadlogt
            isow = iso.getisosweights_vgauss(w, 10.**t, m, isos, p.sigma,
                                             p.dsigmadlogt)
    else:
        isow = iso.getisosweights(w, 10.**t, m, isos)
    model = iso.computeCMD(isow, isos)
    model = utils.normalize(model, sum(data.flat))
    d = numarray.maximum(data, 1e-20)
    llhC = sum((d * numarray.log(d)).flat)
    value = utils.loglikelihood(model, data)
    print "henry:", value, "tom:", 2.0 * (value + llhC)
    return m, s, w, isow, model
Esempio n. 12
0
def log_array(npts, xmin, xmax):
    xstep = num.log(xmax / xmin) / (npts - 1)
    return xmin * num.exp(num.arange(npts, type=num.Float) * xstep)
Esempio n. 13
0
def f(e, e1=300, a=1, b=2, k=1):
    return k * (e / e1)**(-(a + b * num.log(e / e1)))
Esempio n. 14
0
def estimate_mixture(models, seqs, max_iter, eps, alpha=None):
    """ Given a Python-list of models and a SequenceSet seqs
    perform an nested EM to estimate maximum-likelihood
    parameters for the models and the mixture coefficients.
    The iteration stops after max_iter steps or if the
    improvement in log-likelihood is less than eps.

    alpha is a numarray of dimension len(models) containing
    the mixture coefficients. If alpha is not given, uniform
    values will be chosen.
        
    Result: The models are changed in place. Return value
    is (l, alpha, P) where l is the final log likelihood of
    seqs under the mixture, alpha is a numarray of
    dimension len(models) containing the mixture coefficients
    and P is a (|sequences| x |models|)-matrix containing
    P[model j| sequence i]
        
    """
    done = 0
    iter = 1
    last_mixture_likelihood = -99999999.99
    # The (nr of seqs x nr of models)-matrix holding the likelihoods
    l = numarray.zeros((len(seqs), len(models)), numarray.Float)
    if alpha == None: # Uniform alpha
        logalpha = numarray.ones(len(models), numarray.Float) * \
                   math.log(1.0/len(models))
    else:
        logalpha = numarray.log(alpha)
    print logalpha, numarray.exp(logalpha)
    log_nrseqs = math.log(len(seqs))

    while 1:
        # Score all sequences with all models
        for i, m in enumerate(models):
            loglikelihood = m.loglikelihoods(seqs)
            # numarray slices: l[:,i] is the i-th column of l
            l[:,i] = numarray.array(loglikelihood)

        #print l
        for i in xrange(len(seqs)):
            l[i] += logalpha # l[i] = ( log( a_k * P[seq i| model k]) )
        #print l
        mixture_likelihood = numarray.sum(numarray.sum(l))
        print "# iter %s joint likelihood = %f" % (iter, mixture_likelihood) 

        improvement = mixture_likelihood - last_mixture_likelihood
        if iter > max_iter or improvement < eps:
            break

        # Compute P[model j| seq i]
        for i in xrange(len(seqs)):
            seq_logprob = sumlogs(l[i]) # \sum_{k} a_k P[seq i| model k]
            l[i] -= seq_logprob # l[i] = ( log P[model j | seq i] )

        #print l
        l_exp = numarray.exp(l) # XXX Use approx with table lookup
        #print "exp(l)", l_exp
        #print numarray.sum(numarray.transpose(l_exp)) # Print row sums

        # Compute priors alpha
        for i in xrange(len(models)):
            logalpha[i] = sumlogs(l[:,i]) - log_nrseqs

        #print "logalpha", logalpha, numarray.exp(logalpha)

        for j, m in enumerate(models):
            # Set the sequence weight for sequence i under model m to P[m| i]
            for i in xrange(len(seqs)):
                seqs.setWeight(i,l_exp[i,j])
            m.baumWelch(seqs, 10, 0.0001)

        iter += 1
        last_mixture_likelihood = mixture_likelihood

    return (mixture_likelihood, numarray.exp(logalpha), l_exp)
Esempio n. 15
0
import hippo

app = hippo.HDApp()
canvas = app.canvas()

plot = Display("Color Plot", darray, ("GLON", "GLAT", "nil", "nil"))
canvas.addDisplay(plot)


#
# Calculate Log(energy) and add it as new column
#
import numarray

darray["LogE"] = numarray.log(darray["energy"])

lplot = Display("Histogram", darray, ("LogE",))
lplot.setLog("y", True)
canvas.addDisplay(lplot)

#
# Compare it with logrithmic binning
#
clplot = Display("Histogram", darray, ("energy",))
canvas.addDisplay(clplot)
clplot.setLog("x", True)
clplot.setLog("y", True)

#
# Apply a cut to the displays
Esempio n. 16
0
def simul(isodir):
    """ Read in parameters, data and isochrones. Create callback functions
    for the optimization routine, one of which will return the log(likelihood)
    and the other of which will print the best-fit parameter values. Having
    done this, call the optimization routine to minimize log(L).
    """
    log_tmax = math.log10(13.7e9)
    params=parameters()
    eps=0.01
    #feh
    params.set("m0y"   ,1.0, 0,2.5,True)
    params.set("m0cphi",-0.001,  -pi/2+eps,0,True)
    params.set("m0cr"  ,0.01,  0,2,True)
    params.set("m1y"   ,-2.5,  -2.5,0.9,True)
    params.set("m1cphi",1.67,  pi/2+eps,pi,True)
    params.set("m1cr"  ,1.06,  0,2,True)
    params.set("sigma" ,0.2,  0,1, True)
    params.set("dsigmadlogt" ,0.2,  -1,1, True)

    #sfr
    params.set("s0x"   ,8.0,   8.0,9.0,True)
    params.set("s0y"   ,0.5,   0.0,1,True)
    params.set("s0tx"  ,0.1,   0.,1.,True)
    params.set("s0ty"  ,0.1,   0,1,True)
    params.set("s1tx"  ,0.1,   0.,1.,True)
    params.set("s1ty"  ,0.1,   -1,1,True)
    params.set("s1x"   ,0.5,   0,1,True)
    params.set("s1y"   ,1.0,   0.0,1.0,False)
    params.set("s2x"   ,log_tmax,  9.5,10.25,True)
    params.set("s2y"   ,0.1,   0,1.0,True)
    params.set("s2tx"  ,0.1,   0.,1.,True)
    params.set("s2ty"  ,0.1,   0.,1.,True)

    if len(sys.argv) == 2:
        if sys.argv[1] == "start": #run with a param to start from the beginning
            params.save()
    params.load()
    if not params.pars.has_key('dsigmadlogt'):
        params.set('dsigmadlogt',0.,0,False)
    if not params.pars.has_key('dsigmadlogs'):  # Hook for SFR-depenedent spread; not fully implemented 
        params.set('dsigmadlogs',0.,0,False)
    if len(sys.argv) == 2:
        if sys.argv[1] == "nudge": #Tweak the values near their limits
             print "Nudging parameters near the limits"
             p1 = params.getl()
             utils.nudge(params)
             p2 = params.getl()
             for pp1,pp2 in zip(p1,p2):
                 if pp1[1] != pp2[1]:
                      print "%s %.8f -> %.8f" % (pp1[0],pp1[1],pp2[1])

    data=iso.readfits(isodir+"/datarr.fits")
    isos = iso.readisos(isodir)
    t=utils.frange(8,log_tmax,0.001)
    def f(par):
        params.setvalues(par)
        p = params
        w=utils.calculateweights(t,sfr(t,params))
        # isow=iso.getisosweights(w,10.**t,metallicity(t,params),isos)
        if p.sigma > 0.:
            if p.dsigmadlogt == 0.:
                isow=iso.getisosweights_gauss(w,10.**t,metallicity(t,p),isos,p.sigma)
            if p.dsigmadlogt != 0.:
#               print "Gaussian sigma, ds/dlogt ",p.sigma,p.dsigmadlogt
                isow=iso.getisosweights_vgauss(w,10.**t,metallicity(t,p),isos,p.sigma,p.dsigmadlogt)
            if p.dsigmadlogs != 0.: # Hook for SFR-depenedent spread; not fully implemented 
                isow=iso.getisosweights_sgauss(w,10.**t,sfr(t,params),metallicity(t,p),
                   isos,p.sigma,p.dsigmadlogs)
        else:
            isow=iso.getisosweights(w,10.**t,metallicity(t,p),isos)

        m=iso.computeCMD(isow,isos)
        m=utils.normalize(m,sum(data.flat))
        return utils.loglikelihood(m,data)

    d = numarray.maximum(data,1e-20)
    llhC=sum( (d*numarray.log(d)).flat )
    def b(par,value,iter):
        params.setvalues(par)
        params.save()
        print "henry:",value,"tom:",2.0*(value+llhC),"iter:",iter,time.ctime()
        sys.stdout.flush()

    optimization.minmax(optimization.fmin_simplex,f,
            params.getvalues(),params.min(),params.max(),b)
def logistic_regression(x,
                        y,
                        beta_start=None,
                        verbose=False,
                        CONV_THRESH=1.e-3,
                        MAXIT=500):
    """
 Uses the Newton-Raphson algorithm to calculate a maximum
 likelihood estimate logistic regression.
 The algorithm is known as 'iteratively re-weighted least squares', or IRLS.

 x - rank-1 or rank-2 array of predictors. If x is rank-2,
     the number of predictors = x.shape[0] = N.  If x is rank-1,
     it is assumed N=1.
     
 y - binary outcomes (if N>1 len(y) = x.shape[1], if N=1 len(y) = len(x))
 
 beta_start - initial beta vector (default zeros(N+1,x.dtype.char))
 
 if verbose=True, diagnostics printed for each iteration (default False).
 
 MAXIT - max number of iterations (default 500)
 
 CONV_THRESH - convergence threshold (sum of absolute differences
  of beta-beta_old, default 0.001)

 returns beta (the logistic regression coefficients, an N+1 element vector),
 J_bar (the (N+1)x(N+1) information matrix), and l (the log-likeliehood).
 
 J_bar can be used to estimate the covariance matrix and the standard
 error for beta.
 
 l can be used for a chi-squared significance test.

 covmat = inverse(J_bar)     --> covariance matrix of coefficents (beta)
 stderr = sqrt(diag(covmat)) --> standard errors for beta
 deviance = -2l              --> scaled deviance statistic
 chi-squared value for -2l is the model chi-squared test.
    """
    if x.shape[-1] != len(y):
        raise ValueError, "x.shape[-1] and y should be the same length!"
    try:
        N, npreds = x.shape[1], x.shape[0]
    except:  # single predictor, use simple logistic regression routine.
        return _simple_logistic_regression(x,
                                           y,
                                           beta_start=beta_start,
                                           CONV_THRESH=CONV_THRESH,
                                           MAXIT=MAXIT,
                                           verbose=verbose)
    if beta_start is None:
        beta_start = NA.zeros(npreds + 1, x.dtype.char)
    X = NA.ones((npreds + 1, N), x.dtype.char)
    X[1:, :] = x
    Xt = NA.transpose(X)
    iter = 0
    diff = 1.
    beta = beta_start  # initial values
    if verbose:
        print 'iteration  beta log-likliehood |beta-beta_old|'
    while iter < MAXIT:
        beta_old = beta
        ebx = NA.exp(NA.dot(beta, X))
        p = ebx / (1. + ebx)
        l = NA.sum(y * NA.log(p) +
                   (1. - y) * NA.log(1. - p))  # log-likeliehood
        s = NA.dot(X, y - p)  # scoring function
        J_bar = NA.dot(X * p, Xt)  # information matrix
        beta = beta_old + NA.dot(LA.inverse(J_bar), s)  # new value of beta
        diff = NA.sum(NA.fabs(beta - beta_old))  # sum of absolute differences
        if verbose:
            print iter + 1, beta, l, diff
        if diff <= CONV_THRESH: break
        iter = iter + 1
    if iter == MAXIT and diff > CONV_THRESH:
        print 'warning: convergence not achieved with threshold of %s in %s iterations' % (
            CONV_THRESH, MAXIT)
    return beta, J_bar, l
Esempio n. 18
0
print darray.getLabels()

print "Number of rows = ", darray.rows

import hippo
app = hippo.HDApp()
canvas = app.canvas()

plot = Display("Color Plot", darray, ('GLON', 'GLAT', 'nil', 'nil'))
canvas.addDisplay(plot)

#
# Calculate Log(energy) and add it as new column
#
import numarray
darray['LogE'] = numarray.log(darray['energy'])

lplot = Display('Histogram', darray, ('LogE', ))
lplot.setLog('y', True)
canvas.addDisplay(lplot)

#
# Compare it with logrithmic binning
#
clplot = Display('Histogram', darray, ('energy', ))
canvas.addDisplay(clplot)
clplot.setLog('x', True)
clplot.setLog('y', True)

#
# Apply a cut to the displays
def estimate_mixture(models, seqs, max_iter, eps, alpha=None):
    """ Given a Python-list of models and a SequenceSet seqs
    perform an nested EM to estimate maximum-likelihood
    parameters for the models and the mixture coefficients.
    The iteration stops after max_iter steps or if the
    improvement in log-likelihood is less than eps.

    alpha is a numarray of dimension len(models) containing
    the mixture coefficients. If alpha is not given, uniform
    values will be chosen.
        
    Result: The models are changed in place. Return value
    is (l, alpha, P) where l is the final log likelihood of
    seqs under the mixture, alpha is a numarray of
    dimension len(models) containing the mixture coefficients
    and P is a (|sequences| x |models|)-matrix containing
    P[model j| sequence i]
        
    """
    done = 0
    iter = 1
    last_mixture_likelihood = -99999999.99
    # The (nr of seqs x nr of models)-matrix holding the likelihoods
    l = numarray.zeros((len(seqs), len(models)), numarray.Float)
    if alpha == None:  # Uniform alpha
        logalpha = numarray.ones(len(models), numarray.Float) * \
                   math.log(1.0/len(models))
    else:
        logalpha = numarray.log(alpha)
    print logalpha, numarray.exp(logalpha)
    log_nrseqs = math.log(len(seqs))

    while 1:
        # Score all sequences with all models
        for i, m in enumerate(models):
            loglikelihood = m.loglikelihoods(seqs)
            # numarray slices: l[:,i] is the i-th column of l
            l[:, i] = numarray.array(loglikelihood)

        #print l
        for i in xrange(len(seqs)):
            l[i] += logalpha  # l[i] = ( log( a_k * P[seq i| model k]) )
        #print l
        mixture_likelihood = numarray.sum(numarray.sum(l))
        print "# iter %s joint likelihood = %f" % (iter, mixture_likelihood)

        improvement = mixture_likelihood - last_mixture_likelihood
        if iter > max_iter or improvement < eps:
            break

        # Compute P[model j| seq i]
        for i in xrange(len(seqs)):
            seq_logprob = sumlogs(l[i])  # \sum_{k} a_k P[seq i| model k]
            l[i] -= seq_logprob  # l[i] = ( log P[model j | seq i] )

        #print l
        l_exp = numarray.exp(l)  # XXX Use approx with table lookup
        #print "exp(l)", l_exp
        #print numarray.sum(numarray.transpose(l_exp)) # Print row sums

        # Compute priors alpha
        for i in xrange(len(models)):
            logalpha[i] = sumlogs(l[:, i]) - log_nrseqs

        #print "logalpha", logalpha, numarray.exp(logalpha)

        for j, m in enumerate(models):
            # Set the sequence weight for sequence i under model m to P[m| i]
            for i in xrange(len(seqs)):
                seqs.setWeight(i, l_exp[i, j])
            m.baumWelch(seqs, 10, 0.0001)

        iter += 1
        last_mixture_likelihood = mixture_likelihood

    return (mixture_likelihood, numarray.exp(logalpha), l_exp)
def logistic_regression(x,y,beta_start=None,verbose=False,CONV_THRESH=1.e-3,
                        MAXIT=500):
    """
 Uses the Newton-Raphson algorithm to calculate a maximum
 likelihood estimate logistic regression.
 The algorithm is known as 'iteratively re-weighted least squares', or IRLS.

 x - rank-1 or rank-2 array of predictors. If x is rank-2,
     the number of predictors = x.shape[0] = N.  If x is rank-1,
     it is assumed N=1.
     
 y - binary outcomes (if N>1 len(y) = x.shape[1], if N=1 len(y) = len(x))
 
 beta_start - initial beta vector (default zeros(N+1,x.dtype.char))
 
 if verbose=True, diagnostics printed for each iteration (default False).
 
 MAXIT - max number of iterations (default 500)
 
 CONV_THRESH - convergence threshold (sum of absolute differences
  of beta-beta_old, default 0.001)

 returns beta (the logistic regression coefficients, an N+1 element vector),
 J_bar (the (N+1)x(N+1) information matrix), and l (the log-likeliehood).
 
 J_bar can be used to estimate the covariance matrix and the standard
 error for beta.
 
 l can be used for a chi-squared significance test.

 covmat = inverse(J_bar)     --> covariance matrix of coefficents (beta)
 stderr = sqrt(diag(covmat)) --> standard errors for beta
 deviance = -2l              --> scaled deviance statistic
 chi-squared value for -2l is the model chi-squared test.
    """
    if x.shape[-1] != len(y):
        raise ValueError, "x.shape[-1] and y should be the same length!"
    try:
        N, npreds = x.shape[1], x.shape[0]
    except: # single predictor, use simple logistic regression routine.
        return _simple_logistic_regression(x,y,beta_start=beta_start,
               CONV_THRESH=CONV_THRESH,MAXIT=MAXIT,verbose=verbose)
    if beta_start is None:
        beta_start = NA.zeros(npreds+1,x.dtype.char)
    X = NA.ones((npreds+1,N), x.dtype.char)
    X[1:, :] = x
    Xt = NA.transpose(X)
    iter = 0; diff = 1.; beta = beta_start  # initial values
    if verbose:
        print 'iteration  beta log-likliehood |beta-beta_old|' 
    while iter < MAXIT:
        beta_old = beta 
        ebx = NA.exp(NA.dot(beta, X))
        p = ebx/(1.+ebx)
        l = NA.sum(y*NA.log(p) + (1.-y)*NA.log(1.-p)) # log-likeliehood
        s = NA.dot(X, y-p)                            # scoring function
        J_bar = NA.dot(X*p,Xt)                        # information matrix
        beta = beta_old + NA.dot(LA.inverse(J_bar),s) # new value of beta
        diff = NA.sum(NA.fabs(beta-beta_old)) # sum of absolute differences
        if verbose:
            print iter+1, beta, l, diff
        if diff <= CONV_THRESH: break
        iter = iter + 1
    if iter == MAXIT and diff > CONV_THRESH: 
        print 'warning: convergence not achieved with threshold of %s in %s iterations' % (CONV_THRESH,MAXIT)
    return beta, J_bar, l
Esempio n. 21
0
def vc_v200_nfw(x, c):
    top = N.log(1.0 + c*x) - c*x / (1.0 + c*x)
    bottom = N.log(1.0 + c) - c / (1.0 + c)
    vc2 = top / (x * bottom)
    vc = N.sqrt(vc2)
    return vc
Esempio n. 22
0
def log_array(xmin, xmax, npts):
    return xmin * num.exp(
        num.arange(npts, type=num.Float) / (npts - 1) * num.log(xmax / xmin))