def _simple_logistic_regression(x,y,beta_start=None,verbose=False, CONV_THRESH=1.e-3,MAXIT=500): """ Faster than logistic_regression when there is only one predictor. """ if len(x) != len(y): raise ValueError, "x and y should be the same length!" if beta_start is None: beta_start = NA.zeros(2,x.typecode()) iter = 0; diff = 1.; beta = beta_start # initial values if verbose: print 'iteration beta log-likliehood |beta-beta_old|' while iter < MAXIT: beta_old = beta p = NA.exp(beta[0]+beta[1]*x)/(1.+NA.exp(beta[0]+beta[1]*x)) l = NA.sum(y*NA.log(p) + (1.-y)*NA.log(1.-p)) # log-likliehood s = NA.array([NA.sum(y-p), NA.sum((y-p)*x)]) # scoring function # information matrix J_bar = NA.array([[NA.sum(p*(1-p)),NA.sum(p*(1-p)*x)], [NA.sum(p*(1-p)*x),NA.sum(p*(1-p)*x*x)]]) beta = beta_old + NA.dot(LA.inverse(J_bar),s) # new value of beta diff = NA.sum(NA.fabs(beta-beta_old)) # sum of absolute differences if verbose: print iter+1, beta, l, diff if diff <= CONV_THRESH: break iter = iter + 1 return beta, J_bar, l
def _simple_logistic_regression(x,y,beta_start=None,verbose=False, CONV_THRESH=1.e-3,MAXIT=500): """ Faster than logistic_regression when there is only one predictor. """ if len(x) != len(y): raise ValueError, "x and y should be the same length!" if beta_start is None: beta_start = NA.zeros(2,x.dtype.char) iter = 0; diff = 1.; beta = beta_start # initial values if verbose: print 'iteration beta log-likliehood |beta-beta_old|' while iter < MAXIT: beta_old = beta p = NA.exp(beta[0]+beta[1]*x)/(1.+NA.exp(beta[0]+beta[1]*x)) l = NA.sum(y*NA.log(p) + (1.-y)*NA.log(1.-p)) # log-likliehood s = NA.array([NA.sum(y-p), NA.sum((y-p)*x)]) # scoring function # information matrix J_bar = NA.array([[NA.sum(p*(1-p)),NA.sum(p*(1-p)*x)], [NA.sum(p*(1-p)*x),NA.sum(p*(1-p)*x*x)]]) beta = beta_old + NA.dot(LA.inverse(J_bar),s) # new value of beta diff = NA.sum(NA.fabs(beta-beta_old)) # sum of absolute differences if verbose: print iter+1, beta, l, diff if diff <= CONV_THRESH: break iter = iter + 1 return beta, J_bar, l
def _loglikelihood(self, vectors, priors, means, covariances): llh = 0.0 for vector in vectors: p = 0 for j in range(len(priors)): p += priors[j] * \ self._gaussian(means[j], covariances[j], vector) llh += numarray.log(p) return llh
def __call__(self,data): state = self._state = UniTable() state['data'] = data state['nullmodel'] = self.nullmodel(state['data']) state['altmodel'] = self.altmodel(state['data']) state['odds'] = state['altmodel']/state['nullmodel'] state['log_odds'] = na.log(state['odds']) state['cusum'] = list(gen_cusum(state['log_odds'],self.reset_value)) state['score'] = state['cusum'] >self.threshold return state['score'][-1]
def vline(x=None, color='black'): if x == None: pt = Canvas().mouseData() x = pt[0] print x x = numarray.zeros(100) + x display = Canvas().getDisplay() yr = list(display.getRange("y")) if display.getBinWidth('x') > 0 and display.getBinWidth('y') < 0: yr[0] /= display.getBinWidth('x') yr[1] /= display.getBinWidth('x') ylog = display.getLog("y") if ylog: yr = ( numarray.log(yr[0]), numarray.log(yr[1]) ) y = numarray.arange(100)/99.*(yr[1]-yr[0]) + yr[0] if ylog: y = numarray.exp(y) display.setAutoRanging("x", 0) display.setAutoRanging("y", 0) nt = newNTuple( (x, y), ('x', 'y'), register=0 ) Scatter(nt, 'x', 'y', pointRep="Line", oplot=1, lineStyle='Dot', color=color)
def plotLine(display, slope, intercept, xlog, ylog, lineStyle='Dot', color='black'): f = lambda x: slope*x + intercept xr = display.getRange("x") if xlog: xr = ( numarray.log(xr[0]), numarray.log(xr[1]) ) xx = numarray.arange(100)/99.*(xr[1] - xr[0]) + xr[0] yy = numarray.array([f(x) for x in xx]) if ylog: yy = numarray.exp(yy) if xlog: xx = numarray.exp(xx) ylabel = display.getLabel("y") xBinWidth = display.getDataRep().getBinWidth("x") if xBinWidth and ylabel == "Entries / bin": yy = yy/xBinWidth Canvas().selectDisplay( display ) display.setAutoRanging("x", 0) display.setAutoRanging("y", 0) nt = newNTuple( (xx, yy), ('x', 'y'), register=0 ) Scatter(nt, 'x', 'y', pointRep="Line", oplot=1, lineStyle=lineStyle, color=color)
def simul(isodir): params = parameters() eps = 0.01 #feh params.set("m0y", 0.9, -1, 1, True) params.set("m0cphi", -0.001, -pi / 2 + eps, 0, True) params.set("m0cr", 0.01, 0, 2, True) params.set("m1y", -0.9, -1, 1, True) params.set("m1cphi", 1.67, pi / 2 + eps, pi, True) params.set("m1cr", 1.06, 0, 2, True) params.set("sigma", 0.6, 0, 1, True) #sfr params.set("s0y", 1.11, 0.0, 2, True) params.set("s0cphi", 0.0003, 0, pi / 2 - eps, True) params.set("s0cr", 1, 0, 1, True) params.set("s1x", 9.9, 8, 10, True) params.set("s1y", 5.9, 1.5, 10, True) params.set("s1cphi", 4.61, pi / 2 + eps, pi * 3 / 2 - eps, True) params.set("s1cr", 0.49, 0, 0.5, True) params.set("s2y", 0.89, 0, 2, True) params.set("s2cphi", 3.04, pi / 2 + eps, pi - eps, True) params.set("s2cr", 0.001, 0, 2, True) if len(sys.argv) == 2: #run with a param to start from the beginning params.save() params.load() data = iso.readfits(isodir + "/datarr.fits") isos = iso.readisos(isodir) t = utils.frange(8, 10.25, 0.001) def f(par): params.setvalues(par) w = utils.calculateweights(t, sfr(t, params)) #isow=iso.getisosweights(w,10.**t,metallicity(t,params),isos) isow = iso.getisosweights_gauss(w, 10.**t, metallicity(t, params), isos, params.sigma) m = iso.computeCMD(isow, isos) m = utils.normalize(m, sum(data.flat)) return utils.loglikelihood(m, data) d = numarray.maximum(data, 1e-20) llhC = sum((d * numarray.log(d)).flat) def b(par, value, iter): params.setvalues(par) params.save() print "henry:", value, "tom:", 2.0 * (value + llhC), "iter:", iter optimization.minmax(optimization.fmin_simplex, f, params.getvalues(), params.min(), params.max(), b)
def simul(isodir): params=parameters() eps=0.01 #feh params.set("m0y" ,0.9, -1,1,True) params.set("m0cphi",-0.001, -pi/2+eps,0,True) params.set("m0cr" ,0.01, 0,2,True) params.set("m1y" ,-0.9, -1,1,True) params.set("m1cphi",1.67, pi/2+eps,pi,True) params.set("m1cr" ,1.06, 0,2,True) params.set("sigma" ,0.6, 0,1, True) #sfr params.set("s0y" ,1.11, 0.0,2,True) params.set("s0cphi",0.0003, 0,pi/2-eps,True) params.set("s0cr" ,1, 0,1,True) params.set("s1x" ,9.9, 8,10,True) params.set("s1y" ,5.9, 1.5,10,True) params.set("s1cphi",4.61, pi/2+eps,pi*3/2-eps,True) params.set("s1cr" ,0.49, 0,0.5,True) params.set("s2y" ,0.89, 0,2,True) params.set("s2cphi",3.04, pi/2+eps,pi-eps,True) params.set("s2cr" ,0.001, 0,2,True) if len(sys.argv) == 2: #run with a param to start from the beginning params.save() params.load() data=iso.readfits(isodir+"/datarr.fits") isos = iso.readisos(isodir) t=utils.frange(8,10.25,0.001) def f(par): params.setvalues(par) w=utils.calculateweights(t,sfr(t,params)) #isow=iso.getisosweights(w,10.**t,metallicity(t,params),isos) isow=iso.getisosweights_gauss(w,10.**t,metallicity(t,params),isos, params.sigma) m=iso.computeCMD(isow,isos) m=utils.normalize(m,sum(data.flat)) return utils.loglikelihood(m,data) d = numarray.maximum(data,1e-20) llhC=sum( (d*numarray.log(d)).flat ) def b(par,value,iter): params.setvalues(par) params.save() print "henry:",value,"tom:",2.0*(value+llhC),"iter:",iter optimization.minmax(optimization.fmin_simplex,f, params.getvalues(),params.min(),params.max(),b)
def plot_rspgenIntegral(self, energy, inclination, phi=0, nsamp=2000): rmin = 1e-2 rmax = 30. npts = 20 rstep = num.log(rmax / rmin) / (npts - 1) radii = rmin * num.exp(rstep * num.arange(npts)) self._setPsf(energy, inclination, phi) seps = [] srcDir = SkyDir(180, 0) for i in range(nsamp): appDir = self.psf.appDir(energy, srcDir, self.scZAxis, self.scXAxis) seps.append(appDir.difference(srcDir) * 180. / num.pi) seps.sort() fraction = num.arange(nsamp, type=num.Float) / nsamp disp = plot.scatter(seps, fraction, xlog=1, xname='ROI radius', yname='enclosed Psf fraction', pointRep='Line', color='red') disp.setTitle("%s: %i MeV, %.1f deg" % (self.irfs, energy, inclination)) npred = [] resids = [] for radius in radii: npred.append( self.psf.angularIntegral(energy, inclination, phi, radius)) resids.append( num.abs( (self._interpolate(seps, fraction, radius) - npred[-1]) / npred[-1])) plot.scatter(radii, npred, pointRep='Line', oplot=1) residplot = plot.scatter(radii, resids, 'ROI radius', yname='abs(sim - npred)/npred', xlog=1, ylog=1) # Npred = Interpolator(radii, npred) ks_prob = ks2(npred, seps) plot.hline(0) residplot.setTitle("%s: %i MeV, %.1f deg\n ks prob=%.2e" % (self.irfs, energy, inclination, ks_prob[1])) return energy, inclination, ks_prob[1]
def compute_model(t,p,isos,data): """ Returns m,s,w,isow,model """ m=fit.metallicity(t,p) s=fit.sfr(t,p) w=utils.calculateweights(t,s) if not p.pars.has_key('dsigmadlogt'): p.set('dsigmadlogt',0,0,False) if p.sigma > 0.: if p.dsigmadlogt == 0.: isow=iso.getisosweights_gauss(w,10.**t,m,isos,p.sigma) if p.dsigmadlogt != 0.: print "Gaussian sigma, ds/dlogt ",p.sigma,p.dsigmadlogt isow=iso.getisosweights_vgauss(w,10.**t,m,isos,p.sigma,p.dsigmadlogt) else: isow=iso.getisosweights(w,10.**t,m,isos) model=iso.computeCMD(isow,isos) model=utils.normalize(model,sum(data.flat)) d = numarray.maximum(data,1e-20) llhC=sum( (d*numarray.log(d)).flat ) value=utils.loglikelihood(model,data) print "henry:",value,"tom:",2.0*(value+llhC) return m,s,w,isow,model
def compute_model(t, p, isos, data): """ Returns m,s,w,isow,model """ m = fit.metallicity(t, p) s = fit.sfr(t, p) w = utils.calculateweights(t, s) if not p.pars.has_key('dsigmadlogt'): p.set('dsigmadlogt', 0, 0, False) if p.sigma > 0.: if p.dsigmadlogt == 0.: isow = iso.getisosweights_gauss(w, 10.**t, m, isos, p.sigma) if p.dsigmadlogt != 0.: print "Gaussian sigma, ds/dlogt ", p.sigma, p.dsigmadlogt isow = iso.getisosweights_vgauss(w, 10.**t, m, isos, p.sigma, p.dsigmadlogt) else: isow = iso.getisosweights(w, 10.**t, m, isos) model = iso.computeCMD(isow, isos) model = utils.normalize(model, sum(data.flat)) d = numarray.maximum(data, 1e-20) llhC = sum((d * numarray.log(d)).flat) value = utils.loglikelihood(model, data) print "henry:", value, "tom:", 2.0 * (value + llhC) return m, s, w, isow, model
def log_array(npts, xmin, xmax): xstep = num.log(xmax / xmin) / (npts - 1) return xmin * num.exp(num.arange(npts, type=num.Float) * xstep)
def f(e, e1=300, a=1, b=2, k=1): return k * (e / e1)**(-(a + b * num.log(e / e1)))
def estimate_mixture(models, seqs, max_iter, eps, alpha=None): """ Given a Python-list of models and a SequenceSet seqs perform an nested EM to estimate maximum-likelihood parameters for the models and the mixture coefficients. The iteration stops after max_iter steps or if the improvement in log-likelihood is less than eps. alpha is a numarray of dimension len(models) containing the mixture coefficients. If alpha is not given, uniform values will be chosen. Result: The models are changed in place. Return value is (l, alpha, P) where l is the final log likelihood of seqs under the mixture, alpha is a numarray of dimension len(models) containing the mixture coefficients and P is a (|sequences| x |models|)-matrix containing P[model j| sequence i] """ done = 0 iter = 1 last_mixture_likelihood = -99999999.99 # The (nr of seqs x nr of models)-matrix holding the likelihoods l = numarray.zeros((len(seqs), len(models)), numarray.Float) if alpha == None: # Uniform alpha logalpha = numarray.ones(len(models), numarray.Float) * \ math.log(1.0/len(models)) else: logalpha = numarray.log(alpha) print logalpha, numarray.exp(logalpha) log_nrseqs = math.log(len(seqs)) while 1: # Score all sequences with all models for i, m in enumerate(models): loglikelihood = m.loglikelihoods(seqs) # numarray slices: l[:,i] is the i-th column of l l[:,i] = numarray.array(loglikelihood) #print l for i in xrange(len(seqs)): l[i] += logalpha # l[i] = ( log( a_k * P[seq i| model k]) ) #print l mixture_likelihood = numarray.sum(numarray.sum(l)) print "# iter %s joint likelihood = %f" % (iter, mixture_likelihood) improvement = mixture_likelihood - last_mixture_likelihood if iter > max_iter or improvement < eps: break # Compute P[model j| seq i] for i in xrange(len(seqs)): seq_logprob = sumlogs(l[i]) # \sum_{k} a_k P[seq i| model k] l[i] -= seq_logprob # l[i] = ( log P[model j | seq i] ) #print l l_exp = numarray.exp(l) # XXX Use approx with table lookup #print "exp(l)", l_exp #print numarray.sum(numarray.transpose(l_exp)) # Print row sums # Compute priors alpha for i in xrange(len(models)): logalpha[i] = sumlogs(l[:,i]) - log_nrseqs #print "logalpha", logalpha, numarray.exp(logalpha) for j, m in enumerate(models): # Set the sequence weight for sequence i under model m to P[m| i] for i in xrange(len(seqs)): seqs.setWeight(i,l_exp[i,j]) m.baumWelch(seqs, 10, 0.0001) iter += 1 last_mixture_likelihood = mixture_likelihood return (mixture_likelihood, numarray.exp(logalpha), l_exp)
import hippo app = hippo.HDApp() canvas = app.canvas() plot = Display("Color Plot", darray, ("GLON", "GLAT", "nil", "nil")) canvas.addDisplay(plot) # # Calculate Log(energy) and add it as new column # import numarray darray["LogE"] = numarray.log(darray["energy"]) lplot = Display("Histogram", darray, ("LogE",)) lplot.setLog("y", True) canvas.addDisplay(lplot) # # Compare it with logrithmic binning # clplot = Display("Histogram", darray, ("energy",)) canvas.addDisplay(clplot) clplot.setLog("x", True) clplot.setLog("y", True) # # Apply a cut to the displays
def simul(isodir): """ Read in parameters, data and isochrones. Create callback functions for the optimization routine, one of which will return the log(likelihood) and the other of which will print the best-fit parameter values. Having done this, call the optimization routine to minimize log(L). """ log_tmax = math.log10(13.7e9) params=parameters() eps=0.01 #feh params.set("m0y" ,1.0, 0,2.5,True) params.set("m0cphi",-0.001, -pi/2+eps,0,True) params.set("m0cr" ,0.01, 0,2,True) params.set("m1y" ,-2.5, -2.5,0.9,True) params.set("m1cphi",1.67, pi/2+eps,pi,True) params.set("m1cr" ,1.06, 0,2,True) params.set("sigma" ,0.2, 0,1, True) params.set("dsigmadlogt" ,0.2, -1,1, True) #sfr params.set("s0x" ,8.0, 8.0,9.0,True) params.set("s0y" ,0.5, 0.0,1,True) params.set("s0tx" ,0.1, 0.,1.,True) params.set("s0ty" ,0.1, 0,1,True) params.set("s1tx" ,0.1, 0.,1.,True) params.set("s1ty" ,0.1, -1,1,True) params.set("s1x" ,0.5, 0,1,True) params.set("s1y" ,1.0, 0.0,1.0,False) params.set("s2x" ,log_tmax, 9.5,10.25,True) params.set("s2y" ,0.1, 0,1.0,True) params.set("s2tx" ,0.1, 0.,1.,True) params.set("s2ty" ,0.1, 0.,1.,True) if len(sys.argv) == 2: if sys.argv[1] == "start": #run with a param to start from the beginning params.save() params.load() if not params.pars.has_key('dsigmadlogt'): params.set('dsigmadlogt',0.,0,False) if not params.pars.has_key('dsigmadlogs'): # Hook for SFR-depenedent spread; not fully implemented params.set('dsigmadlogs',0.,0,False) if len(sys.argv) == 2: if sys.argv[1] == "nudge": #Tweak the values near their limits print "Nudging parameters near the limits" p1 = params.getl() utils.nudge(params) p2 = params.getl() for pp1,pp2 in zip(p1,p2): if pp1[1] != pp2[1]: print "%s %.8f -> %.8f" % (pp1[0],pp1[1],pp2[1]) data=iso.readfits(isodir+"/datarr.fits") isos = iso.readisos(isodir) t=utils.frange(8,log_tmax,0.001) def f(par): params.setvalues(par) p = params w=utils.calculateweights(t,sfr(t,params)) # isow=iso.getisosweights(w,10.**t,metallicity(t,params),isos) if p.sigma > 0.: if p.dsigmadlogt == 0.: isow=iso.getisosweights_gauss(w,10.**t,metallicity(t,p),isos,p.sigma) if p.dsigmadlogt != 0.: # print "Gaussian sigma, ds/dlogt ",p.sigma,p.dsigmadlogt isow=iso.getisosweights_vgauss(w,10.**t,metallicity(t,p),isos,p.sigma,p.dsigmadlogt) if p.dsigmadlogs != 0.: # Hook for SFR-depenedent spread; not fully implemented isow=iso.getisosweights_sgauss(w,10.**t,sfr(t,params),metallicity(t,p), isos,p.sigma,p.dsigmadlogs) else: isow=iso.getisosweights(w,10.**t,metallicity(t,p),isos) m=iso.computeCMD(isow,isos) m=utils.normalize(m,sum(data.flat)) return utils.loglikelihood(m,data) d = numarray.maximum(data,1e-20) llhC=sum( (d*numarray.log(d)).flat ) def b(par,value,iter): params.setvalues(par) params.save() print "henry:",value,"tom:",2.0*(value+llhC),"iter:",iter,time.ctime() sys.stdout.flush() optimization.minmax(optimization.fmin_simplex,f, params.getvalues(),params.min(),params.max(),b)
def logistic_regression(x, y, beta_start=None, verbose=False, CONV_THRESH=1.e-3, MAXIT=500): """ Uses the Newton-Raphson algorithm to calculate a maximum likelihood estimate logistic regression. The algorithm is known as 'iteratively re-weighted least squares', or IRLS. x - rank-1 or rank-2 array of predictors. If x is rank-2, the number of predictors = x.shape[0] = N. If x is rank-1, it is assumed N=1. y - binary outcomes (if N>1 len(y) = x.shape[1], if N=1 len(y) = len(x)) beta_start - initial beta vector (default zeros(N+1,x.dtype.char)) if verbose=True, diagnostics printed for each iteration (default False). MAXIT - max number of iterations (default 500) CONV_THRESH - convergence threshold (sum of absolute differences of beta-beta_old, default 0.001) returns beta (the logistic regression coefficients, an N+1 element vector), J_bar (the (N+1)x(N+1) information matrix), and l (the log-likeliehood). J_bar can be used to estimate the covariance matrix and the standard error for beta. l can be used for a chi-squared significance test. covmat = inverse(J_bar) --> covariance matrix of coefficents (beta) stderr = sqrt(diag(covmat)) --> standard errors for beta deviance = -2l --> scaled deviance statistic chi-squared value for -2l is the model chi-squared test. """ if x.shape[-1] != len(y): raise ValueError, "x.shape[-1] and y should be the same length!" try: N, npreds = x.shape[1], x.shape[0] except: # single predictor, use simple logistic regression routine. return _simple_logistic_regression(x, y, beta_start=beta_start, CONV_THRESH=CONV_THRESH, MAXIT=MAXIT, verbose=verbose) if beta_start is None: beta_start = NA.zeros(npreds + 1, x.dtype.char) X = NA.ones((npreds + 1, N), x.dtype.char) X[1:, :] = x Xt = NA.transpose(X) iter = 0 diff = 1. beta = beta_start # initial values if verbose: print 'iteration beta log-likliehood |beta-beta_old|' while iter < MAXIT: beta_old = beta ebx = NA.exp(NA.dot(beta, X)) p = ebx / (1. + ebx) l = NA.sum(y * NA.log(p) + (1. - y) * NA.log(1. - p)) # log-likeliehood s = NA.dot(X, y - p) # scoring function J_bar = NA.dot(X * p, Xt) # information matrix beta = beta_old + NA.dot(LA.inverse(J_bar), s) # new value of beta diff = NA.sum(NA.fabs(beta - beta_old)) # sum of absolute differences if verbose: print iter + 1, beta, l, diff if diff <= CONV_THRESH: break iter = iter + 1 if iter == MAXIT and diff > CONV_THRESH: print 'warning: convergence not achieved with threshold of %s in %s iterations' % ( CONV_THRESH, MAXIT) return beta, J_bar, l
print darray.getLabels() print "Number of rows = ", darray.rows import hippo app = hippo.HDApp() canvas = app.canvas() plot = Display("Color Plot", darray, ('GLON', 'GLAT', 'nil', 'nil')) canvas.addDisplay(plot) # # Calculate Log(energy) and add it as new column # import numarray darray['LogE'] = numarray.log(darray['energy']) lplot = Display('Histogram', darray, ('LogE', )) lplot.setLog('y', True) canvas.addDisplay(lplot) # # Compare it with logrithmic binning # clplot = Display('Histogram', darray, ('energy', )) canvas.addDisplay(clplot) clplot.setLog('x', True) clplot.setLog('y', True) # # Apply a cut to the displays
def estimate_mixture(models, seqs, max_iter, eps, alpha=None): """ Given a Python-list of models and a SequenceSet seqs perform an nested EM to estimate maximum-likelihood parameters for the models and the mixture coefficients. The iteration stops after max_iter steps or if the improvement in log-likelihood is less than eps. alpha is a numarray of dimension len(models) containing the mixture coefficients. If alpha is not given, uniform values will be chosen. Result: The models are changed in place. Return value is (l, alpha, P) where l is the final log likelihood of seqs under the mixture, alpha is a numarray of dimension len(models) containing the mixture coefficients and P is a (|sequences| x |models|)-matrix containing P[model j| sequence i] """ done = 0 iter = 1 last_mixture_likelihood = -99999999.99 # The (nr of seqs x nr of models)-matrix holding the likelihoods l = numarray.zeros((len(seqs), len(models)), numarray.Float) if alpha == None: # Uniform alpha logalpha = numarray.ones(len(models), numarray.Float) * \ math.log(1.0/len(models)) else: logalpha = numarray.log(alpha) print logalpha, numarray.exp(logalpha) log_nrseqs = math.log(len(seqs)) while 1: # Score all sequences with all models for i, m in enumerate(models): loglikelihood = m.loglikelihoods(seqs) # numarray slices: l[:,i] is the i-th column of l l[:, i] = numarray.array(loglikelihood) #print l for i in xrange(len(seqs)): l[i] += logalpha # l[i] = ( log( a_k * P[seq i| model k]) ) #print l mixture_likelihood = numarray.sum(numarray.sum(l)) print "# iter %s joint likelihood = %f" % (iter, mixture_likelihood) improvement = mixture_likelihood - last_mixture_likelihood if iter > max_iter or improvement < eps: break # Compute P[model j| seq i] for i in xrange(len(seqs)): seq_logprob = sumlogs(l[i]) # \sum_{k} a_k P[seq i| model k] l[i] -= seq_logprob # l[i] = ( log P[model j | seq i] ) #print l l_exp = numarray.exp(l) # XXX Use approx with table lookup #print "exp(l)", l_exp #print numarray.sum(numarray.transpose(l_exp)) # Print row sums # Compute priors alpha for i in xrange(len(models)): logalpha[i] = sumlogs(l[:, i]) - log_nrseqs #print "logalpha", logalpha, numarray.exp(logalpha) for j, m in enumerate(models): # Set the sequence weight for sequence i under model m to P[m| i] for i in xrange(len(seqs)): seqs.setWeight(i, l_exp[i, j]) m.baumWelch(seqs, 10, 0.0001) iter += 1 last_mixture_likelihood = mixture_likelihood return (mixture_likelihood, numarray.exp(logalpha), l_exp)
def logistic_regression(x,y,beta_start=None,verbose=False,CONV_THRESH=1.e-3, MAXIT=500): """ Uses the Newton-Raphson algorithm to calculate a maximum likelihood estimate logistic regression. The algorithm is known as 'iteratively re-weighted least squares', or IRLS. x - rank-1 or rank-2 array of predictors. If x is rank-2, the number of predictors = x.shape[0] = N. If x is rank-1, it is assumed N=1. y - binary outcomes (if N>1 len(y) = x.shape[1], if N=1 len(y) = len(x)) beta_start - initial beta vector (default zeros(N+1,x.dtype.char)) if verbose=True, diagnostics printed for each iteration (default False). MAXIT - max number of iterations (default 500) CONV_THRESH - convergence threshold (sum of absolute differences of beta-beta_old, default 0.001) returns beta (the logistic regression coefficients, an N+1 element vector), J_bar (the (N+1)x(N+1) information matrix), and l (the log-likeliehood). J_bar can be used to estimate the covariance matrix and the standard error for beta. l can be used for a chi-squared significance test. covmat = inverse(J_bar) --> covariance matrix of coefficents (beta) stderr = sqrt(diag(covmat)) --> standard errors for beta deviance = -2l --> scaled deviance statistic chi-squared value for -2l is the model chi-squared test. """ if x.shape[-1] != len(y): raise ValueError, "x.shape[-1] and y should be the same length!" try: N, npreds = x.shape[1], x.shape[0] except: # single predictor, use simple logistic regression routine. return _simple_logistic_regression(x,y,beta_start=beta_start, CONV_THRESH=CONV_THRESH,MAXIT=MAXIT,verbose=verbose) if beta_start is None: beta_start = NA.zeros(npreds+1,x.dtype.char) X = NA.ones((npreds+1,N), x.dtype.char) X[1:, :] = x Xt = NA.transpose(X) iter = 0; diff = 1.; beta = beta_start # initial values if verbose: print 'iteration beta log-likliehood |beta-beta_old|' while iter < MAXIT: beta_old = beta ebx = NA.exp(NA.dot(beta, X)) p = ebx/(1.+ebx) l = NA.sum(y*NA.log(p) + (1.-y)*NA.log(1.-p)) # log-likeliehood s = NA.dot(X, y-p) # scoring function J_bar = NA.dot(X*p,Xt) # information matrix beta = beta_old + NA.dot(LA.inverse(J_bar),s) # new value of beta diff = NA.sum(NA.fabs(beta-beta_old)) # sum of absolute differences if verbose: print iter+1, beta, l, diff if diff <= CONV_THRESH: break iter = iter + 1 if iter == MAXIT and diff > CONV_THRESH: print 'warning: convergence not achieved with threshold of %s in %s iterations' % (CONV_THRESH,MAXIT) return beta, J_bar, l
def vc_v200_nfw(x, c): top = N.log(1.0 + c*x) - c*x / (1.0 + c*x) bottom = N.log(1.0 + c) - c / (1.0 + c) vc2 = top / (x * bottom) vc = N.sqrt(vc2) return vc
def log_array(xmin, xmax, npts): return xmin * num.exp( num.arange(npts, type=num.Float) / (npts - 1) * num.log(xmax / xmin))