def testMarginalise(self): def factorial(n): if n==1:return 1 return factorial(n - 1) * n var = set('c') b = self.a.Marginalise(var) var2 = set(['c','a']) c = self.a.Marginalise(var2) d = DiscretePotential(['b','c'], [3,4], na.arange(12)) # extended test a = DiscretePotential('a b c d e f'.split(), [2,3,4,5,6,7], \ na.arange(factorial(7))) aa = a.Marginalise('c f a'.split()) assert(b.names == self.a.names - var and \ b[0,1] == na.sum(self.a[0,1]) and \ c.names == self.a.names - var2 and \ na.alltrue(c.cpt.flat == na.sum(na.sum(self.a.cpt,axis=2), axis=0)) and aa.shape == (3,5,6) and \ aa.names_list == 'b d e'.split() and \ aa[2,4,3] == na.sum(a[:,2,:,4,3,:].flat)), \ " Marginalisation doesn't work"
def PlotTurbulenceIllustr(a): """ Can generate the grid with g=kolmogorovutils.GenerateKolmogorov3D( 1025, 129, 129) a=kolmogorovutils.GridToNumarray(g) """ for x in [1,10,100]: suba= numarray.sum(a[:,:,0:x], axis=2) suba.transpose() pylab.clf() pylab.matshow(suba) pylab.savefig("temp/turb3d-sum%03i.eps" % x) for x in [1,10,100]: for j in [0,1,2]: suba= numarray.sum(a[:,:200,j*x:(j+1)*x], axis=2) suba.transpose() pylab.clf() pylab.matshow(suba) pylab.savefig("temp/turb3d-sum%03i-s%i.eps" % (x,j))
def stderr(x, s, m): # calculates the standard error of x with errors s about mean m w = 1.0/s**2 sumw = N.sum(w) sumw2 = N.sum(w*w) sumdx2 = N.sum(w * (x - m)**2) return sqrt(sumdx2/sumw2)
def calc_scatter(x, y, sig, int_scat, a, b): if int_scat is None: int_scat = 0.0 w = 1.0 / (sig**2 + int_scat**2) wSum = N.sum(w) varw = N.sum((y - a - b*x)**2 * w) / wSum #var = N.sum((y - a - b*x)**2) / len(x) stdev = sqrt(varw) return stdev
def mean(x, s=None): # calculates the weighted mean of x with errors s if s is None: return N.sum(x) / len(x) else: w = 1.0/s**2 sumw = N.sum(w) sumx = N.sum(w*x) return sumx/sumw
def draw_hour(self): months = self.get_month_range() elem = self.element_w.getvalue() Busy.Manager.busy() self.update_idletasks() g = self.graphics['hour'] # clear display g['ax'].cla() # title if len(months) == 1: title = '%s %s %s (%d-%d)' % ((self.id_, \ self.Element.get(elem, ''), self.Month[months[0]]) + \ tuple(self.data['years'])) else: title = '%s %s %s-%s (%d-%d)' % ((self.id_, \ self.Element.get(elem, ''), self.Month[months[0]], \ self.Month[months[-1]]) + tuple(self.data['years'])) g['ax'].set_title(title) cell_text = [] col_labels = ['%02d' % x for x in range(24)] xvals = na.arange(24) + 0.2 # the bottom values for stacked bar chart yoff = na.zeros(len(col_labels), na.Float32) num_cat = len(ClimLib.FlightCats) bar = [None]*num_cat widths = [0.6]*24 # stacked bars g['ax'].xaxis.set_major_locator(multipleLocator) # sum over selected range of months and wind directions tmp = na.sum(na.sum(na.take(self.data[elem], months, 0), 0), 1) # sum over all categories total = na.sum(tmp, -1) for row in range(num_cat): yvals = 100.0*tmp[:,row]/total bar[row] = g['ax'].bar(xvals, yvals, widths, bottom=yoff, color=self.colors[num_cat-row-1]) yoff += yvals cell_row = ['%.0f' % yvals[n] for n in range(24)] cell_text.append(cell_row) cell_text.reverse() g['ax'].table(cellText=cell_text, rowLabels=self.RowLabels, rowColours=self.colors, colLabels=col_labels, loc='bottom') # legend legend_bars = [x[0] for x in bar] legend_bars.reverse() g['ax'].legend(legend_bars, self.RowLabels) # axes g['ax'].set_ylabel('Percent occurrence') g['ax'].set_xticks([]) ymax, delta = self.set_yticks(elem, 'hour') g['ax'].set_yticks(na.arange(0, ymax, delta)) g['ax'].grid(True) g['canvas'].draw() Busy.Manager.notbusy()
def variance(x, s=None, m=None): # calculates the weighted variance of x with errors s and mean m if m is None: m = mean(x, s) if s is None: sumdx2 = N.sum((x - m)**2) return sumdx2/len(x) else: w = 1.0/s**2 sumw = N.sum(w) sumdx2 = N.sum(w * (x - m)**2) return sumdx2/sumw
def Print(self): for c in self.v.values(): print c print c.cpt print c.cpt.shape print na.sum(c.cpt.flat) for c in self.e.values(): print c print c.cpt print c.cpt.shape print na.sum(c.cpt.flat)
def setCounts(self): """ set the distributions underlying parameters (mu, sigma) to match the samples """ assert(self.samples), "No samples given..." samples = na.array(self.samples, type='Float32') self.mean = na.sum(samples) / len(samples) deviation = samples - self.mean squared_deviation = deviation*deviation sum_squared_deviation = na.sum(squared_deviation) self.sigma = (sum_squared_deviation / (len(samples)-1.0)) ** 0.5
def _simple_logistic_regression(x,y,beta_start=None,verbose=False, CONV_THRESH=1.e-3,MAXIT=500): """ Faster than logistic_regression when there is only one predictor. """ if len(x) != len(y): raise ValueError, "x and y should be the same length!" if beta_start is None: beta_start = NA.zeros(2,x.dtype.char) iter = 0; diff = 1.; beta = beta_start # initial values if verbose: print 'iteration beta log-likliehood |beta-beta_old|' while iter < MAXIT: beta_old = beta p = NA.exp(beta[0]+beta[1]*x)/(1.+NA.exp(beta[0]+beta[1]*x)) l = NA.sum(y*NA.log(p) + (1.-y)*NA.log(1.-p)) # log-likliehood s = NA.array([NA.sum(y-p), NA.sum((y-p)*x)]) # scoring function # information matrix J_bar = NA.array([[NA.sum(p*(1-p)),NA.sum(p*(1-p)*x)], [NA.sum(p*(1-p)*x),NA.sum(p*(1-p)*x*x)]]) beta = beta_old + NA.dot(LA.inverse(J_bar),s) # new value of beta diff = NA.sum(NA.fabs(beta-beta_old)) # sum of absolute differences if verbose: print iter+1, beta, l, diff if diff <= CONV_THRESH: break iter = iter + 1 return beta, J_bar, l
def _simple_logistic_regression(x,y,beta_start=None,verbose=False, CONV_THRESH=1.e-3,MAXIT=500): """ Faster than logistic_regression when there is only one predictor. """ if len(x) != len(y): raise ValueError, "x and y should be the same length!" if beta_start is None: beta_start = NA.zeros(2,x.typecode()) iter = 0; diff = 1.; beta = beta_start # initial values if verbose: print 'iteration beta log-likliehood |beta-beta_old|' while iter < MAXIT: beta_old = beta p = NA.exp(beta[0]+beta[1]*x)/(1.+NA.exp(beta[0]+beta[1]*x)) l = NA.sum(y*NA.log(p) + (1.-y)*NA.log(1.-p)) # log-likliehood s = NA.array([NA.sum(y-p), NA.sum((y-p)*x)]) # scoring function # information matrix J_bar = NA.array([[NA.sum(p*(1-p)),NA.sum(p*(1-p)*x)], [NA.sum(p*(1-p)*x),NA.sum(p*(1-p)*x*x)]]) beta = beta_old + NA.dot(LA.inverse(J_bar),s) # new value of beta diff = NA.sum(NA.fabs(beta-beta_old)) # sum of absolute differences if verbose: print iter+1, beta, l, diff if diff <= CONV_THRESH: break iter = iter + 1 return beta, J_bar, l
def test_Mcmc(): print "*** Testing Mcmc ***" rosen = optimizers.Rosen() startingValues = vector([2., 2.]) rosen.setParamValues(startingValues) rosen.setParamBounds("x", -10., 10.) rosen.setParamBounds("y", -10., 10.) verbose = 0 minuitObj = optimizers.Minuit(rosen) minuitObj.find_min(verbose, 1e-4) # # use uncertainties from Minuit to set the transition widths # sigmas = minuitObj.getUncertainty() scale = 1 widths = vector(map(lambda x: x * scale, sigmas)) mcmcObj = optimizers.Mcmc(rosen) mcmcObj.setTransitionWidths(widths) samples = vecvec() nsamp = 100000 # # Do a burn in... # mcmcObj.generateSamples(samples, nsamp) # # then the real thing... # nsamp = 100000 mcmcObj.generateSamples(samples, nsamp) x = list(samples) samps = [] for j in range(len(x[0])): samps.append(numarray.array(map(lambda i: x[i][j], range(len(x))))) print "MCMC results for ", nsamp, " trials:" for j in range(len(samples[0])): x = samps[j] mc_avg = numarray.sum(x) / len(x) mc2_avg = numarray.sum(x * x) / len(x) print "Parameter %i: %e +/- %e" % ( j, mc_avg, math.sqrt(mc2_avg - mc_avg * mc_avg)) print "*** Mcmc tests completed ***\n" return samps
def sample(self, index={}): """ returns the index of the sampled value eg. a=Pr(A)=[0.5 0.3 0.0 0.2] a.sample() --> 5/10 times will return 0 3/10 times will return 1 2/10 times will return 3 2 will never be returned - returns an integer - only works for one variable tables eg. a=Pr(A,B); a.sample() --> ERROR """ assert(len(self.names) == 1 or \ len(self.names - set(index.keys())) == 1), \ "Sample only works for one variable tables" if not index == {}: tcpt = self.__getitem__(index) else: tcpt = self.cpt # csum is the cumulative sum of the distribution # csum[i] = na.sum(self.cpt[0:i]) # csum[-1] = na.sum(self.cpt) csum = [na.sum(tcpt.flat[0:end+1]) for end in range(tcpt.shape[0])] # sample in this distribution r = random.random() for i,cs in enumerate(csum): if r < cs: return i return i
def Animate(g): for i in range(1,64,5): pylab.clf() x= g[0:i,:,:] y= numarray.sum(x, axis=0) pylab.matshow( y) pylab.savefig("temp/3dturb-%03i.png" % i)
def run_kut5(F, x, y, h): # Runge-Kutta-Fehlberg formulas C = array([37./378, 0., 250./621, 125./594, \ 0., 512./1771]) D = array([2825./27648, 0., 18575./48384, \ 13525./55296, 277./14336, 1./4]) n = len(y) K = zeros((6, n), type=Float64) K[0] = h * F(x, y) K[1] = h * F(x + 1. / 5 * h, y + 1. / 5 * K[0]) K[2] = h * F(x + 3. / 10 * h, y + 3. / 40 * K[0] + 9. / 40 * K[1]) K[3] = h*F(x + 3./5*h, y + 3./10*K[0]- 9./10*K[1] \ + 6./5*K[2]) K[4] = h*F(x + h, y - 11./54*K[0] + 5./2*K[1] \ - 70./27*K[2] + 35./27*K[3]) K[5] = h*F(x + 7./8*h, y + 1631./55296*K[0] \ + 175./512*K[1] + 575./13824*K[2] \ + 44275./110592*K[3] + 253./4096*K[4]) # Initialize arrays {dy} and {E} E = zeros((n), type=Float64) dy = zeros((n), type=Float64) # Compute solution increment {dy} and per-step error {E} for i in range(6): dy = dy + C[i] * K[i] E = E + (C[i] - D[i]) * K[i] # Compute RMS error e e = sqrt(sum(E**2) / n) return dy, e
def sample(self, index={}): """ in order to sample from this distributions, all parents must be known """ # mean = self.mean.copy() # sigma = self.sigma.copy() ## if index: ## # discrete parents ## for v,i in enumerate(reversed(self.discrete_parents)): ## # reverse: avoid missing axes when taking in random ## # we start from the end, that way all other dimensions keep the same index ## if index.has_key(v.name): ## # take the corresponding mean; +1 because first axis is the mean ## mean = na.take(mean, index[v], axis=(i+1) ) ## # take the corresponding covariance; +2 because first 2 axes are the cov ## sigma = na.take(sigma, index[v], axis=(i+2) ) ## ## # continuous parents ## for v in reversed(self.continuous_parents): ## if index.has_key(v): d_index, c_index = self._numIndexFromDict(index) mean = na.array(self.mean[tuple([slice(None, None, None)] + d_index)]) sigma = self.sigma[tuple([slice(None, None, None)] * 2 +d_index)] wi = na.sum(self.weights * na.array(c_index)[na.NewAxis,...], axis=1) # if self.continuous_parents: # wi = na.array(self.weights[tuple([slice(None,None,None)]+c_index)]) # else: wi = 0.0 # return a random number from a normal multivariate distribution return float(ra.multivariate_normal(mean + wi, sigma))
def run_kut5(F, x, y, h): # Runge-Kutta-Fehlberg formulas C = array([37.0 / 378, 0.0, 250.0 / 621, 125.0 / 594, 0.0, 512.0 / 1771]) D = array([2825.0 / 27648, 0.0, 18575.0 / 48384, 13525.0 / 55296, 277.0 / 14336, 1.0 / 4]) n = len(y) K = zeros((6, n), type=Float64) K[0] = h * F(x, y) K[1] = h * F(x + 1.0 / 5 * h, y + 1.0 / 5 * K[0]) K[2] = h * F(x + 3.0 / 10 * h, y + 3.0 / 40 * K[0] + 9.0 / 40 * K[1]) K[3] = h * F(x + 3.0 / 5 * h, y + 3.0 / 10 * K[0] - 9.0 / 10 * K[1] + 6.0 / 5 * K[2]) K[4] = h * F(x + h, y - 11.0 / 54 * K[0] + 5.0 / 2 * K[1] - 70.0 / 27 * K[2] + 35.0 / 27 * K[3]) K[5] = h * F( x + 7.0 / 8 * h, y + 1631.0 / 55296 * K[0] + 175.0 / 512 * K[1] + 575.0 / 13824 * K[2] + 44275.0 / 110592 * K[3] + 253.0 / 4096 * K[4], ) # Initialize arrays {dy} and {E} E = zeros((n), type=Float64) dy = zeros((n), type=Float64) # Compute solution increment {dy} and per-step error {E} for i in range(6): dy = dy + C[i] * K[i] E = E + (C[i] - D[i]) * K[i] # Compute RMS error e e = sqrt(sum(E ** 2) / n) return dy, e
def combine_count(self): """Combines 4-dimensional dictionary along items""" month_ix, hour_ix, wind_ix = range(3) tc = self.data[self.stats_dialog.element.getvalue()] items=self.stats_dialog.columns.getvalue() if 'month' not in items: tc = na.sum(tc, month_ix) hour_ix -= 1 wind_ix -= 1 if 'hour' not in items: tc = na.sum(tc, hour_ix) wind_ix -= 1 if 'wdir' not in items: tc = na.sum(tc, wind_ix) return tc
def drawmeridians(self,ax,meridians,color='k',linewidth=1., \ linestyle='--',dashes=[1,1]): """ draw meridians (longitude lines). ax - current axis instance. meridians - list containing longitude values to draw (in degrees). color - color to draw meridians (default black). linewidth - line width for meridians (default 1.) linestyle - line style for meridians (default '--', i.e. dashed). dashes - dash pattern for meridians (default [1,1], i.e. 1 pixel on, 1 pixel off). """ if self.projection not in ['merc','cyl']: lats = N.arange(-80,81).astype('f') else: lats = N.arange(-90,91).astype('f') xdelta = 0.1*(self.xmax-self.xmin) ydelta = 0.1*(self.ymax-self.ymin) for merid in meridians: lons = merid*N.ones(len(lats),'f') x,y = self(lons,lats) # remove points outside domain. testx = N.logical_and(x>=self.xmin-xdelta,x<=self.xmax+xdelta) x = N.compress(testx, x) y = N.compress(testx, y) testy = N.logical_and(y>=self.ymin-ydelta,y<=self.ymax+ydelta) x = N.compress(testy, x) y = N.compress(testy, y) if len(x) > 1 and len(y) > 1: # split into separate line segments if necessary. # (not necessary for mercator or cylindrical). xd = (x[1:]-x[0:-1])**2 yd = (y[1:]-y[0:-1])**2 dist = N.sqrt(xd+yd) split = dist > 500000. if N.sum(split) and self.projection not in ['merc','cyl']: ind = (N.compress(split,MLab.squeeze(split*N.indices(xd.shape)))+1).tolist() xl = [] yl = [] iprev = 0 ind.append(len(xd)) for i in ind: xl.append(x[iprev:i]) yl.append(y[iprev:i]) iprev = i else: xl = [x] yl = [y] # draw each line segment. for x,y in zip(xl,yl): # skip if only a point. if len(x) > 1 and len(y) > 1: l = Line2D(x,y,linewidth=linewidth,linestyle=linestyle) l.set_color(color) l.set_dashes(dashes) ax.add_line(l)
def __init__(self, names, shape, g=None, h=None, K=None): Potential.__init__(self, names) self.shape = shape # set parameters to 0s self.n = na.sum(shape) if not g: self.g = 0.0 else: self.g = float(g) if not h: self.h = na.zeros(shape=(self.n), type='Float32') else: self.h = na.array(h,shape=(self.n), type='Float32') if not K: self.K = na.zeros(shape=(self.n, self.n), type='Float32') else: self.K = na.array(K, shape=(self.n, self.n), type='Float32')
def med_func(x, y, sig, b): small = 1.0e-8 aa = median(y - b*x) d = (y - aa - b*x) mad = median(N.absolute(d)) s = mad / 0.6745 d /= sig sign = N.compress(N.absolute(d) > small, d) sign = sign / N.absolute(sign) x = N.compress(N.absolute(d) > small, x) sum = N.sum(sign * x) return sum, s, aa
def set_yticks(self, elem, item): if self.tkAutoScale.get(): if item in ['month', 'hour']: # sum over wind directions and categories total = na.sum(na.sum(self.data[elem], 3), 2) # fudge columns with no events total[na.ieeespecial.index(total, na.ieeespecial.ZERO)] = 1.0 ymax = 100.0*(na.sum(na.sum(self.data[elem][:,:,:,:-1], 3), 2)/total).max() elif item in ['wdir']: # sum categories total = na.sum(self.data[elem], 3) # fudge columns with no events total[na.ieeespecial.index(total, na.ieeespecial.ZERO)] = 1.0 ymax = 100.0*(na.sum(self.data[elem][:,:,:,:-1], 3)/total).max() else: raise Avn.AvnError('Bug in set_yticks(): unexpected item: %s' % \ item) self.tkScale.set(ymax) else: ymax = int(self.tkScale.get()) if ymax > 30: delta = 10 elif ymax > 10: delta = 5 else: delta = 2 return ymax+delta, delta
def recalculate_position(self): """ takes all the positions of the associated observation list and recalculated a position """ global blah if self.stype == "real": print "! not supposed to do this with a real source.." return if len(self.associated_obs) == 0: return pos_array = numarray.fromlist( [[x.pos[0], x.pos[1], x.assumed_err[0], x.assumed_err[1]] for x in self.associated_obs]) raa = numarray.fromlist([x[0] for x in pos_array]) raerra = numarray.fromlist([x[2] for x in pos_array]) deca = numarray.fromlist([x[1] for x in pos_array]) decerra = numarray.fromlist([x[3] for x in pos_array]) ra = numarray.sum(raa / raerra**2) / numarray.sum(1.0 / raerra**2) dec = numarray.sum(deca / decerra**2) / numarray.sum(1.0 / decerra**2) raerr = math.sqrt(1.0 / numarray.sum(1.0 / raerra**2)) decerr = math.sqrt(1.0 / numarray.sum(1.0 / decerra**2)) self.current_pos = [ra, dec] self.current_err = [raerr, decerr]
def calcCorrelationHelper(s1p, s2p): # if the traits share less than six strains, then we don't # bother with the correlations if len(s1p) < 6: return 0.0 # subtract by x-bar and y-bar elementwise #oldS1P = s1p.copy() #oldS2P = s2p.copy() s1p = (s1p - numarray.average(s1p)).astype(numarray.Float64) s2p = (s2p - numarray.average(s2p)).astype(numarray.Float64) # square for the variances s1p_2 = numarray.sum(s1p**2) s2p_2 = numarray.sum(s2p**2) try: corr = (numarray.sum(s1p*s2p)/ numarray.sqrt(s1p_2 * s2p_2)) except ZeroDivisionError: corr = 0.0 return corr
def normalize(self, dim=-1): """ If dim=-1 all elements sum to 1. Otherwise sum to specific dimension, such that sum(Pr(x=i|Pa(x))) = 1 for all values of i and a specific set of values for Pa(x) """ if dim == -1 or len(self.cpt.shape) == 1: self.cpt /= self.cpt.sum() else: ndim = self.assocdim[dim] order = range(len(self.names_list)) order[0] = ndim order[ndim] = 0 tcpt = na.transpose(self.cpt, order) t1cpt = na.sum(tcpt, axis=0) t1cpt = na.resize(t1cpt,tcpt.shape) tcpt = tcpt/t1cpt self.cpt = na.transpose(tcpt, order)
def binitsumequal(x, y, n): #bin arrays x, y into n bins, returning xbin,ybin nx = len(x) #x=N.array(x,'f') #y=N.array(y,'f') y = N.take(y, N.argsort(x)) x = N.take(x, N.argsort(x)) xbin = N.zeros(n, 'f') ybin = N.zeros(n, 'f') #ybinerr=N.zeros(n,'f') for i in range(n): nmin = i * int(float(nx) / float(n)) nmax = (i + 1) * int(float(nx) / float(n)) xbin[i] = N.average(x[nmin:nmax]) ybin[i] = N.sum(y[nmin:nmax]) #xbin[i]=N.average(x[nmin:nmax]) #ybin[i]=N.average(y[nmin:nmax]) #ybinerr[i]=scipy.stats.std(y[nmin:nmax]) return xbin, ybin #, ybinerr
def wmspec(data, windows=None, doxs=1): """Compute power and cross spectra, with a window or multitapers. data and doxs are as for multispec (except the default for doxs is different). If windows is one-dimensional, it is treated as a windowing function. If windows is two-dimensional, it is treated as a set of multitapers.""" data = num.asarray(data) if windows is None: windows = [1] else: windows = num.array(windows, copy=0) win_length = windows.shape[-1] windows.shape = (-1, win_length) nwin = len(windows) # subtract the average avg = num.sum(data, -1) / data.shape[-1] avg = num.asarray(avg) data = data - avg[..., num.NewAxis] total_power = 0 if doxs: total_cross = 0 for window in windows: spectra = multispec(data * window, doxs=doxs) if doxs: total_power += spectra[0] total_cross += spectra[1] else: total_power += spectra total_power /= nwin if doxs: total_cross /= nwin if doxs: return total_power, total_cross else: return total_power
def biwt_func(x, y, sig, b): # Problems?!? aa = median(y - b*x) d = (y - aa - b*x) mad = median(N.absolute(d)) s = mad / 0.6745 d /= sig # biweight c = 6.0 f = d*(1-d**2/c**2)**2 sum = N.sum(N.compress(N.absolute(d) <= c, x*f)) # lorentzian #f = d/(1+0.5*d**2) #sum = N.sum(x*f) # MAD #small = 1.0e-8 #sign = N.compress(N.absolute(d) > small, d) #sign = sign / N.absolute(sign) #sum = N.sum(N.compress(N.absolute(d) > small, x)*sign) return sum, s, aa
def azmr(self): x=N.compress((self.mpaflag > 0.1) & (self.ew > 4.) & (self.Mabs < -18.),self.Mabs) y=N.compress((self.mpaflag > 0.1) & (self.ew > 4.) & (self.Mabs < -18.),self.ar) x1=N.compress((self.mpaflag > 0.1) & (self.ew > 4.) & (self.Mabs < -20.38),self.Mabs) y1=N.compress((self.mpaflag > 0.1) & (self.ew > 4.) & (self.Mabs < -20.38),self.ar) y=2.5*N.log10(y) #pylab.plot(x,y,'k.',markersize=0.1,zorder=1) print "average Ar for Mr < -20.38 = %5.2f +/- %5.2f"%(N.average(y1),pylab.std(y1)) (xbin,ybin)=my.binit(x1,y1,20) #(xbin,ybin,ybinerr)=my.biniterr(x,y,20) for i in range(len(xbin)): print i,xbin[i],ybin[i] print "Average of binned values = ",N.average(ybin) print "average Ar for Mr < -20.38 = %5.2f +/- %5.2f"%(N.average(N.log10(y1)),pylab.std(N.log10(y1))) #pylab.axis([-26.,-12.,0.1,30.]) pylab.xlabel(r'$\rm{M_r}$',fontsize=28.) pylab.ylabel(r'$\rm{A_r}$',fontsize=28.) (xbin,ybin)=my.binit(x,y,20) #(xbin,ybin,ybinerr)=my.biniterr(x,y,20) for i in range(len(xbin)): print i,xbin[i],ybin[i] pylab.plot(xbin,ybin,'r-',lw=5) ax=pylab.gca() xmin=-24. xmax=-18. ymin=-1. ymax=3. my.contourf(x,y,xmin,xmax,ymin,ymax) pylab.axvline(x=-20.6,linewidth=3,ls='--',c='g') xl=N.arange(-23.,-20.5,.2) yl=0.76*N.ones(len(xl),'f') pylab.plot(xl,yl,'b-',lw=3) pylab.axis([-24.,-18,-1.,2.4]) #ax.set_yscale('log') #pylab.show() pylab.savefig('armr.eps') print "fraction w/MPA stellar mass and Az = ",N.sum(self.mpaflag)/(1.*len(self.mpaflag))
def get_year_count(stn=''): #returns a list of TAF site USAF ID #s given a TAF site ID if not stn: return [] ids = cdutils.getHistory(stn) fh = file(ish_inv,'r') lines = {} for line in fh.readlines(): if line[:12] in ids: line_arr = [el.strip() for el in line.split(' ') if el != ''] if not lines.has_key((line_arr[0],line_arr[1])): lines[(line_arr[0], line_arr[1])] = [] lines[(line_arr[0], line_arr[1])].append([line_arr[2], \ numarray.sum([int(x) for x in line_arr[3:14]])]) for key in lines: lines[key] = continuity_check(lines[key]) fh.close() return lines
def integrate(F,x,y,xStop,tol): def midpoint(F,x,y,xStop,nSteps): # Midpoint formulas h = (xStop - x)/nSteps y0 = y y1 = y0 + h*F(x,y0) for i in range(nSteps-1): x = x + h y2 = y0 + 2.0*h*F(x,y1) y0 = y1 y1 = y2 return 0.5*(y1 + y0 + h*F(x,y2)) def richardson(r,k): # Richardson's extrapolation for j in range(k-1,0,-1): const = 4.0**(k-j) r[j] = (const*r[j+1] - r[j])/(const - 1.0) return kMax = 51 n = len(y) r = zeros((kMax,n),type=Float64) # Start with two integration steps nSteps = 2 r[1] = midpoint(F,x,y,xStop,nSteps) r_old = r[1].copy() # Double the number of integration points # and refine result by Richardson extrapolation for k in range(2,kMax): nSteps = nSteps*2 r[k] = midpoint(F,x,y,xStop,nSteps) richardson(r,k) # Compute RMS change in solution e = sqrt(sum((r[1] - r_old)**2)/n) # Check for convergence if e < tol: return r[1] r_old = r[1].copy() print "Midpoint method did not converge"
def Marginalise(self, varnames): """ Marginalises the variables specified in varnames. eg. a = Pr(A,B,C,D) a.Marginalise(['A','C']) --> Pr(B,D) = Sum(A,C)(Pr(A,B,C,D)) returns a new DiscretePotential instance the variables keep their relative order """ temp = self.cpt.view() ax = [self.assocdim[v] for v in varnames] ax.sort(reverse=True) # sort and reverse list to avoid inexistent dimensions newnames = copy(self.names_list) for a in ax: temp = na.sum(temp, axis=a) newnames.pop(a) #================================================= #---ERROR : In which order ????? #remainingNames = self.names - set(varnames) #remainingNames_list = [name for name in self.names_list if name in remainingNames] return self.__class__(newnames, temp.shape, temp)
def integrate(F, x, y, xStop, tol): def midpoint(F, x, y, xStop, nSteps): # Midpoint formulas h = (xStop - x) / nSteps y0 = y y1 = y0 + h * F(x, y0) for i in range(nSteps - 1): x = x + h y2 = y0 + 2.0 * h * F(x, y1) y0 = y1 y1 = y2 return 0.5 * (y1 + y0 + h * F(x, y2)) def richardson(r, k): # Richardson's extrapolation for j in range(k - 1, 0, -1): const = 4.0**(k - j) r[j] = (const * r[j + 1] - r[j]) / (const - 1.0) return kMax = 51 n = len(y) r = zeros((kMax, n), type=Float64) # Start with two integration steps nSteps = 2 r[1] = midpoint(F, x, y, xStop, nSteps) r_old = r[1].copy() # Double the number of integration points # and refine result by Richardson extrapolation for k in range(2, kMax): nSteps = nSteps * 2 r[k] = midpoint(F, x, y, xStop, nSteps) richardson(r, k) # Compute RMS change in solution e = sqrt(sum((r[1] - r_old)**2) / n) # Check for convergence if e < tol: return r[1] r_old = r[1].copy() print "Midpoint method did not converge"
def solveforab(aveaperr, avearea): #(a,b)=solveforab(aveap,avearea) amin = .0 amax = 1 bmin = .0 bmax = 1 step = .005 a = N.arange(amin, amax, step, 'f') b = N.arange(bmin, bmax, step, 'f') y = N.zeros(len(aveaperr), 'f') diff = N.zeros(len(aveaperr), 'f') mini = 1000000000. for ai in a: for bi in b: y = N.zeros(len(avearea), 'f') y = N.sqrt(avearea) * ai * (1. + bi * N.sqrt(avearea)) #diff = N.sqrt((y - aveap)**2) diff = (y - aveaperr) sumdiff = N.sum(abs(diff)) #print "%5.3f %5.3f %8.2f %8.2f" %(ai,bi,sumdiff,mini) if sumdiff < mini: afinal = ai bfinal = bi mini = sumdiff return afinal, bfinal
def recalculate_position(self): """ takes all the positions of the associated observation list and recalculated a position """ global blah if self.stype=="real": print "! not supposed to do this with a real source.." return if len(self.associated_obs) == 0: return pos_array = numarray.fromlist([[x.pos[0],x.pos[1],x.assumed_err[0],x.assumed_err[1]] for x in self.associated_obs]) raa = numarray.fromlist([x[0] for x in pos_array]) raerra = numarray.fromlist([x[2] for x in pos_array]) deca = numarray.fromlist([x[1] for x in pos_array]) decerra = numarray.fromlist([x[3] for x in pos_array]) ra = numarray.sum(raa/raerra**2)/numarray.sum(1.0/raerra**2) dec = numarray.sum(deca/decerra**2)/numarray.sum(1.0/decerra**2) raerr = math.sqrt(1.0/numarray.sum(1.0/raerra**2)) decerr = math.sqrt(1.0/numarray.sum(1.0/decerra**2)) self.current_pos = [ra,dec] self.current_err = [raerr,decerr]
def stddev2(numbers): n, = numbers.shape sum = numarray.sum(numbers) sum_of_squares = numarray.sum(numbers * numbers) return sqrt(sum_of_squares / n - (sum / n)**2)
def mad_combine(infileglob, outfilebase): # get list of files to operate on files = glob.glob(infileglob) # check more than one image exists if files < 2: print 'Less than two input files found!' return print 'Operating on images',infileglob # get images images = _get_images(files) lenimages = len(images) print 'Found', lenimages, 'images' # get header of first image hdr = images[0][0].header # get ascard of first image hdrascard = hdr.ascard # get data from images into num (untidy tuple concatenation) data = num.zeros((lenimages,) + images[0][0].data.shape, num.Float32) for i in range(lenimages): data[i,:,:] = images[i][0].data # delete array del images # sort data print 'Sorting... (this may take a while, i.e an hour or so!)' #data_sorted = num.sort(data, axis=0) data_sorted = data # for debugging # find standard deviation of lowest n - n_discard pixels print 'Calculating mad_low...' mad_low = _mad3(data_sorted[:-n_discard,:,:]) # correct for bias to stddev num.multiply(mad_low, corr[`lenimages - n_discard` + ',' + `lenimages`], mad_low) # make median image print 'Calculating median...' if lenimages%2 != 0: # then odd number of images m = (lenimages - 1) / 2 median = data_sorted[m,:,:] else: # even number of images m = lenimages / 2 median = (data_sorted[m,:,:] + data_sorted[m-1,:,:]) / 2 # delete array del data_sorted # get ccd properties from header # these keywords are for FORS2 # - they may need altering for other instruments gain = hdr['OUT1GAIN'] # N_{ADU} = gain * N_{e-} invgain = 1.0 / gain # N_{e-} = invgain * N_{ADU} ron = hdr['OUT1RON'] # read out noise in e- # take only +ve values in median median_pos = num.choose(median < 0.0, (median, 0.0)) # calculate sigma due to ccd noise for each pixel print 'Calculating noise_med...' noise_med = num.sqrt(median_pos * invgain + ron*ron) * gain # delete array del median_pos # find maximum of noise and mad_low # -> sigma to test pixels against to identify cosmics print 'Calculating sigma_test...' sigma_test = num.choose(noise_med < mad_low, (noise_med, mad_low)) # delete arrays del mad_low, noise_med # calculate 'relative residual' for each pixel print 'Calculating rel_res...' rel_res = num.zeros(data.shape, num.Float32) res = num.zeros(data[0].shape, num.Float32) for i in range(lenimages): num.subtract(data[i,:,:], median, res) num.divide(res, sigma_test, rel_res[i,:,:]) # delete arrays del sigma_test, res # now average over all pixels for which rel_res < sigma_limit # first count number included for each pixel # by testing to produce a boolean array, then summing over. print 'Calculating included...' included = num.zeros(rel_res[0].shape, num.Int16) included[:,:] = num.sum(rel_res <= sigma_limit) # put all discarded pixels to zero print 'Calculating combined...' pre_combine = num.choose(rel_res <= sigma_limit, (0.0,data)) # delete array del rel_res # sum all pixels and divide by included to give mean combined = num.sum(pre_combine) # delete array del pre_combine num.divide(combined, included, combined) # Work out errors on this combined image # take only +ve values in combined mean_pos = num.choose(combined < 0.0, (combined, 0.0)) # calculate sigma due to ccd noise for each pixel print 'Calculating noise_mean...' noise_mean = num.sqrt(mean_pos * invgain + ron*ron) * gain # delete array del mean_pos # create standard error image print 'Calculating error...' error = noise_mean / num.sqrt(included) # delete array del noise_mean # write all images to disk print 'Writing images to disk...' _write_images(combined, error, included, hdrascard, outfilebase)
def estimate_mixture(models, seqs, max_iter, eps, alpha=None): """ Given a Python-list of models and a SequenceSet seqs perform an nested EM to estimate maximum-likelihood parameters for the models and the mixture coefficients. The iteration stops after max_iter steps or if the improvement in log-likelihood is less than eps. alpha is a numarray of dimension len(models) containing the mixture coefficients. If alpha is not given, uniform values will be chosen. Result: The models are changed in place. Return value is (l, alpha, P) where l is the final log likelihood of seqs under the mixture, alpha is a numarray of dimension len(models) containing the mixture coefficients and P is a (|sequences| x |models|)-matrix containing P[model j| sequence i] """ done = 0 iter = 1 last_mixture_likelihood = -99999999.99 # The (nr of seqs x nr of models)-matrix holding the likelihoods l = numarray.zeros((len(seqs), len(models)), numarray.Float) if alpha == None: # Uniform alpha logalpha = numarray.ones(len(models), numarray.Float) * \ math.log(1.0/len(models)) else: logalpha = numarray.log(alpha) print logalpha, numarray.exp(logalpha) log_nrseqs = math.log(len(seqs)) while 1: # Score all sequences with all models for i, m in enumerate(models): loglikelihood = m.loglikelihoods(seqs) # numarray slices: l[:,i] is the i-th column of l l[:,i] = numarray.array(loglikelihood) #print l for i in xrange(len(seqs)): l[i] += logalpha # l[i] = ( log( a_k * P[seq i| model k]) ) #print l mixture_likelihood = numarray.sum(numarray.sum(l)) print "# iter %s joint likelihood = %f" % (iter, mixture_likelihood) improvement = mixture_likelihood - last_mixture_likelihood if iter > max_iter or improvement < eps: break # Compute P[model j| seq i] for i in xrange(len(seqs)): seq_logprob = sumlogs(l[i]) # \sum_{k} a_k P[seq i| model k] l[i] -= seq_logprob # l[i] = ( log P[model j | seq i] ) #print l l_exp = numarray.exp(l) # XXX Use approx with table lookup #print "exp(l)", l_exp #print numarray.sum(numarray.transpose(l_exp)) # Print row sums # Compute priors alpha for i in xrange(len(models)): logalpha[i] = sumlogs(l[:,i]) - log_nrseqs #print "logalpha", logalpha, numarray.exp(logalpha) for j, m in enumerate(models): # Set the sequence weight for sequence i under model m to P[m| i] for i in xrange(len(seqs)): seqs.setWeight(i,l_exp[i,j]) m.baumWelch(seqs, 10, 0.0001) iter += 1 last_mixture_likelihood = mixture_likelihood return (mixture_likelihood, numarray.exp(logalpha), l_exp)
def logistic_regression(x, y, beta_start=None, verbose=False, CONV_THRESH=1.e-3, MAXIT=500): """ Uses the Newton-Raphson algorithm to calculate a maximum likelihood estimate logistic regression. The algorithm is known as 'iteratively re-weighted least squares', or IRLS. x - rank-1 or rank-2 array of predictors. If x is rank-2, the number of predictors = x.shape[0] = N. If x is rank-1, it is assumed N=1. y - binary outcomes (if N>1 len(y) = x.shape[1], if N=1 len(y) = len(x)) beta_start - initial beta vector (default zeros(N+1,x.dtype.char)) if verbose=True, diagnostics printed for each iteration (default False). MAXIT - max number of iterations (default 500) CONV_THRESH - convergence threshold (sum of absolute differences of beta-beta_old, default 0.001) returns beta (the logistic regression coefficients, an N+1 element vector), J_bar (the (N+1)x(N+1) information matrix), and l (the log-likeliehood). J_bar can be used to estimate the covariance matrix and the standard error for beta. l can be used for a chi-squared significance test. covmat = inverse(J_bar) --> covariance matrix of coefficents (beta) stderr = sqrt(diag(covmat)) --> standard errors for beta deviance = -2l --> scaled deviance statistic chi-squared value for -2l is the model chi-squared test. """ if x.shape[-1] != len(y): raise ValueError, "x.shape[-1] and y should be the same length!" try: N, npreds = x.shape[1], x.shape[0] except: # single predictor, use simple logistic regression routine. return _simple_logistic_regression(x, y, beta_start=beta_start, CONV_THRESH=CONV_THRESH, MAXIT=MAXIT, verbose=verbose) if beta_start is None: beta_start = NA.zeros(npreds + 1, x.dtype.char) X = NA.ones((npreds + 1, N), x.dtype.char) X[1:, :] = x Xt = NA.transpose(X) iter = 0 diff = 1. beta = beta_start # initial values if verbose: print 'iteration beta log-likliehood |beta-beta_old|' while iter < MAXIT: beta_old = beta ebx = NA.exp(NA.dot(beta, X)) p = ebx / (1. + ebx) l = NA.sum(y * NA.log(p) + (1. - y) * NA.log(1. - p)) # log-likeliehood s = NA.dot(X, y - p) # scoring function J_bar = NA.dot(X * p, Xt) # information matrix beta = beta_old + NA.dot(LA.inverse(J_bar), s) # new value of beta diff = NA.sum(NA.fabs(beta - beta_old)) # sum of absolute differences if verbose: print iter + 1, beta, l, diff if diff <= CONV_THRESH: break iter = iter + 1 if iter == MAXIT and diff > CONV_THRESH: print 'warning: convergence not achieved with threshold of %s in %s iterations' % ( CONV_THRESH, MAXIT) return beta, J_bar, l
# correlations r12 = 0.5 # average correlation between the first predictor and the obs. r13 = 0.25 # avg correlation between the second predictor and the obs. r23 = 0.125 # avg correlation between predictors. # random draws from trivariate normal distribution x = multivariate_normal( NA.array([0, 0, 0]), NA.array([[1, r12, r13], [r12, 1, r23], [r13, r23, 1]]), nsamps) x2 = multivariate_normal( NA.array([0, 0, 0]), NA.array([[1, r12, r13], [r12, 1, r23], [r13, r23, 1]]), nsamps) print 'correlations (r12,r13,r23) = ', r12, r13, r23 print 'number of realizations = ', nsamps # training data. obs = x[:, 0] climprob = NA.sum((obs > 0).astype('f')) / nsamps fcst = NA.transpose(x[:, 1:]) # 2 predictors. obs_binary = obs > 0. # independent data for verification. obs2 = x2[:, 0] fcst2 = NA.transpose(x2[:, 1:]) # compute logistic regression. beta, Jbar, llik = logistic_regression(fcst, obs_binary, verbose=True) covmat = LA.inverse(Jbar) stderr = NA.sqrt(mlab.diag(covmat)) print 'beta =', beta print 'standard error =', stderr # forecasts from independent data. prob = calcprob(beta, fcst2) # compute Brier Skill Score verif = (obs2 > 0.).astype('f')
def numeric_hamming4(num1, num2): assert len(num1) == len(num2) return numarray.sum(num1 != num2)
def rosen(x): # The Rosenbrock function return Num.sum(100.0 * (x[1:] - x[:-1]**2.0)**2.0 + (1 - x[:-1])**2.0)
CalLeTriggered = numarray.where ( daSvac ['GemCalLeVector'] == 1, 1, 0 ) # # The labels of the columns we will add to the data set # label_TkrTriggered = 'Number of Towers with Tkr Triggers' label_CalLeTriggered = 'Number of Towers with CalLe Triggers' label_TowerTkrTrigGemCond = 'Number of Towers with Tkr Triggers after Cut' label_TowerCalLeTrigGemCond = 'Number of Towers with CalLe Triggers after Cut' # # For each event, take the sum of the columns. # Second argument in sum() is 0 for sum of row and 1 for column # Then add resulting vector as a new column to the data set # nbrTkrTriggered = numarray.sum ( TkrTriggered, 1 ) print "Verify shape of result of sum" print nbrTkrTriggered.shape daSvac [ label_TkrTriggered ] = nbrTkrTriggered nbrCalLeTriggered = numarray.sum ( CalLeTriggered, 1 ) daSvac [ label_CalLeTriggered ] = nbrCalLeTriggered # # Get the branch as an array and create a numarray withere the value # is equal to 7 # t = daSvac [ 'GemConditionsWord' ] == 7.0 # # If GemConditionsWord == 7., then fill element with `nbrTkrTriggered', # otherwise with -1
slices=[] for index in range(len(psf_stars['data']['X'])): x=float(psf_stars['data']['X'][index]) y=float(psf_stars['data']['Y'][index]) if x+xbox > data.getshape()[1] or x-xbox < 0 or y+ybox > data.getshape()[0] or y-ybox < 0: continue l=int(x-xbox) r=int(x+xbox) t=int(y-ybox) b=int(y+ybox) sec = data[t:b,l:r].copy() sec = shift(sec,(x-int(x),y-int(y)),order=3) obj = N.where(sec > 2.0*average(average(sec)),1,0) sky2 = N.where(sec < 2.0*average(average(sec)),1,0) sky2 = N.sum(N.sum(sky2*sec))/N.sum(N.sum(sky2)) (lab, nobj) = label(obj,structure=s) f = N.nd_image.find_objects(lab) skip=0 msec = masked_outside(sec,sky2-5.0*sqrt(sky2),40000.) for i in range(1,nobj+1): (a,b)=shape(obj[f[i-1]]) a*=1. b*=1. if a*b < 10: continue if a/b < 0.5 or b/a < 0.5 or a>25 or b> 25: ### some part of chunk of the image has a wonky shape ### better skip this one.
def itmean(arr, darr, thresh): arr = numarray.array(arr) darr = numarray.array(darr) print len(arr), len(darr) arr = numarray.compress(darr > 0., arr) darr = numarray.compress(darr > 0., darr) arr2 = arr darr2 = darr m = numarray.sum(arr / darr) / numarray.sum(1. / darr) lold = 0 l = len(arr) while (l != lold): arr2 = numarray.compress(abs(arr - m) < thresh, arr) darr2 = numarray.compress(abs(arr - m) < thresh, darr) m = numarray.sum(arr2 / darr2) / numarray.sum(1. / darr2) lold = l l = len(arr2) print numarray.sum(arr / darr) / numarray.sum(1. / darr), numarray.sum( arr2 / darr2) / numarray.sum(1. / darr2), len(arr2), len(arr) return numarray.sum(arr2 / darr2) / numarray.sum(1. / darr2)
def fig6c(xi, yi, zz): """ Plot cumulative metallicity distribution """ feh = numarray.sum(zz, 1) pylab.plot(yi, feh) pylab.xlabel('[Fe/H]') pylab.ylabel('N')
def estimate_mixture(models, seqs, max_iter, eps, alpha=None): """ Given a Python-list of models and a SequenceSet seqs perform an nested EM to estimate maximum-likelihood parameters for the models and the mixture coefficients. The iteration stops after max_iter steps or if the improvement in log-likelihood is less than eps. alpha is a numarray of dimension len(models) containing the mixture coefficients. If alpha is not given, uniform values will be chosen. Result: The models are changed in place. Return value is (l, alpha, P) where l is the final log likelihood of seqs under the mixture, alpha is a numarray of dimension len(models) containing the mixture coefficients and P is a (|sequences| x |models|)-matrix containing P[model j| sequence i] """ done = 0 iter = 1 last_mixture_likelihood = -99999999.99 # The (nr of seqs x nr of models)-matrix holding the likelihoods l = numarray.zeros((len(seqs), len(models)), numarray.Float) if alpha == None: # Uniform alpha logalpha = numarray.ones(len(models), numarray.Float) * \ math.log(1.0/len(models)) else: logalpha = numarray.log(alpha) print logalpha, numarray.exp(logalpha) log_nrseqs = math.log(len(seqs)) while 1: # Score all sequences with all models for i, m in enumerate(models): loglikelihood = m.loglikelihoods(seqs) # numarray slices: l[:,i] is the i-th column of l l[:, i] = numarray.array(loglikelihood) #print l for i in xrange(len(seqs)): l[i] += logalpha # l[i] = ( log( a_k * P[seq i| model k]) ) #print l mixture_likelihood = numarray.sum(numarray.sum(l)) print "# iter %s joint likelihood = %f" % (iter, mixture_likelihood) improvement = mixture_likelihood - last_mixture_likelihood if iter > max_iter or improvement < eps: break # Compute P[model j| seq i] for i in xrange(len(seqs)): seq_logprob = sumlogs(l[i]) # \sum_{k} a_k P[seq i| model k] l[i] -= seq_logprob # l[i] = ( log P[model j | seq i] ) #print l l_exp = numarray.exp(l) # XXX Use approx with table lookup #print "exp(l)", l_exp #print numarray.sum(numarray.transpose(l_exp)) # Print row sums # Compute priors alpha for i in xrange(len(models)): logalpha[i] = sumlogs(l[:, i]) - log_nrseqs #print "logalpha", logalpha, numarray.exp(logalpha) for j, m in enumerate(models): # Set the sequence weight for sequence i under model m to P[m| i] for i in xrange(len(seqs)): seqs.setWeight(i, l_exp[i, j]) m.baumWelch(seqs, 10, 0.0001) iter += 1 last_mixture_likelihood = mixture_likelihood return (mixture_likelihood, numarray.exp(logalpha), l_exp)
# convert bins (Hz) to k: omega = 2 * N.pi * bins k = W.WaveNumber(g, omega, h) # convert to energy at the bottom: Eb = Es / (N.sinh(k*h)**2) ##print "Energy at Top:", ##print Es[0:1] ##print "Energy at Bottom:", ##print Eb[0:1] ##print "Ratio:" ##print Eb[0:1] / Es[0:1] # total energy Etotal = N.sum(Eb, 1)# sum across rows print "max energy", N.maximum.reduce(Etotal) # Plot energy over time: F = pylab.Figure() ax = pylab.subplot(1,1,1) #print "Position:", ax.get_position() left, bottom, width, height = ax.get_position() delta = 0.08 ax.set_position([left, bottom + delta, width, height-delta]) #pylab.plot(pylab.date2num(datetimes), Etotal ) #pylab.plot_date(pylab.date2num(datetimes), Etotal, "-" ) ax.plot_date(pylab.date2num(datetimes), Etotal, "-" )
def drawmeridians(self,ax,meridians,color='k',linewidth=1., \ linestyle='--',dashes=[1,1],labels=[0,0,0,0],\ font='rm',fontsize=12): """ draw meridians (longitude lines). ax - current axis instance. meridians - list containing longitude values to draw (in degrees). color - color to draw meridians (default black). linewidth - line width for meridians (default 1.) linestyle - line style for meridians (default '--', i.e. dashed). dashes - dash pattern for meridians (default [1,1], i.e. 1 pixel on, 1 pixel off). labels - list of 4 values (default [0,0,0,0]) that control whether meridians are labelled where they intersect the left, right, top or bottom of the plot. For example labels=[1,0,0,1] will cause meridians to be labelled where they intersect the left and bottom of the plot, but not the right and top. Labels are located with a precision of 0.1 degrees and are drawn using mathtext. font - mathtext font used for labels ('rm','tt','it' or 'cal', default 'rm'. fontsize - font size in points for labels (default 12). """ # don't draw meridians past latmax, always draw parallel at latmax. latmax = 80. # not used for cyl, merc projections. # offset for labels. yoffset = (self.urcrnry - self.llcrnry) / 100. / self.aspect xoffset = (self.urcrnrx - self.llcrnrx) / 100. if self.projection not in ['merc', 'cyl']: lats = N.arange(-latmax, latmax + 1).astype('f') else: lats = N.arange(-90, 91).astype('f') xdelta = 0.1 * (self.xmax - self.xmin) ydelta = 0.1 * (self.ymax - self.ymin) for merid in meridians: lons = merid * N.ones(len(lats), 'f') x, y = self(lons, lats) # remove points outside domain. testx = N.logical_and(x >= self.xmin - xdelta, x <= self.xmax + xdelta) x = N.compress(testx, x) y = N.compress(testx, y) testy = N.logical_and(y >= self.ymin - ydelta, y <= self.ymax + ydelta) x = N.compress(testy, x) y = N.compress(testy, y) if len(x) > 1 and len(y) > 1: # split into separate line segments if necessary. # (not necessary for mercator or cylindrical). xd = (x[1:] - x[0:-1])**2 yd = (y[1:] - y[0:-1])**2 dist = N.sqrt(xd + yd) split = dist > 500000. if N.sum(split) and self.projection not in ['merc', 'cyl']: ind = (N.compress( split, pylab.squeeze(split * N.indices(xd.shape))) + 1).tolist() xl = [] yl = [] iprev = 0 ind.append(len(xd)) for i in ind: xl.append(x[iprev:i]) yl.append(y[iprev:i]) iprev = i else: xl = [x] yl = [y] # draw each line segment. for x, y in zip(xl, yl): # skip if only a point. if len(x) > 1 and len(y) > 1: l = Line2D(x, y, linewidth=linewidth, linestyle=linestyle) l.set_color(color) l.set_dashes(dashes) ax.add_line(l) # draw labels for meridians. # search along edges of map to see if parallels intersect. # if so, find x,y location of intersection and draw a label there. if self.projection == 'cyl': dx = 0.01 dy = 0.01 elif self.projection == 'merc': dx = 0.01 dy = 1000 else: dx = 1000 dy = 1000 for dolab, side in zip(labels, ['l', 'r', 't', 'b']): if not dolab: continue # for cyl or merc, don't draw meridians on left or right. if self.projection in ['cyl', 'merc'] and side in ['l', 'r']: continue if side in ['l', 'r']: nmax = int((self.ymax - self.ymin) / dy + 1) if self.urcrnry < self.llcrnry: yy = self.llcrnry - dy * N.arange(nmax) else: yy = self.llcrnry + dy * N.arange(nmax) if side == 'l': lons, lats = self(self.llcrnrx * N.ones(yy.shape, 'f'), yy, inverse=True) else: lons, lats = self(self.urcrnrx * N.ones(yy.shape, 'f'), yy, inverse=True) lons = N.where(lons < 0, lons + 360, lons) lons = [int(lon * 10) for lon in lons.tolist()] lats = [int(lat * 10) for lat in lats.tolist()] else: nmax = int((self.xmax - self.xmin) / dx + 1) if self.urcrnrx < self.llcrnrx: xx = self.llcrnrx - dx * N.arange(nmax) else: xx = self.llcrnrx + dx * N.arange(nmax) if side == 'b': lons, lats = self(xx, self.llcrnry * N.ones(xx.shape, 'f'), inverse=True) else: lons, lats = self(xx, self.urcrnry * N.ones(xx.shape, 'f'), inverse=True) lons = N.where(lons < 0, lons + 360, lons) lons = [int(lon * 10) for lon in lons.tolist()] lats = [int(lat * 10) for lat in lats.tolist()] for lon in meridians: if lon < 0: lon = lon + 360. # find index of meridian (there may be two, so # search from left and right). try: nl = lons.index(int(lon * 10)) except: nl = -1 try: nr = len(lons) - lons[::-1].index(int(lon * 10)) - 1 except: nr = -1 if lon > 180: lonlab = r'$\%s{%g\/^{\circ}\/W}$' % (font, N.fabs(lon - 360)) elif lon < 180 and lon != 0: lonlab = r'$\%s{%g\/^{\circ}\/E}$' % (font, lon) else: lonlab = r'$\%s{%g\/^{\circ}}$' % (font, lon) # meridians can intersect each map edge twice. for i, n in enumerate([nl, nr]): lat = lats[n] / 10. # no meridians > latmax for projections other than merc,cyl. if self.projection not in ['merc', 'cyl'] and lat > latmax: continue # don't bother if close to the first label. if i and abs(nr - nl) < 100: continue if n > 0: if side == 'l': pylab.text(self.llcrnrx - xoffset, yy[n], lonlab, horizontalalignment='right', verticalalignment='center', fontsize=fontsize) elif side == 'r': pylab.text(self.urcrnrx + xoffset, yy[n], lonlab, horizontalalignment='left', verticalalignment='center', fontsize=fontsize) elif side == 'b': pylab.text(xx[n], self.llcrnry - yoffset, lonlab, horizontalalignment='center', verticalalignment='top', fontsize=fontsize) else: pylab.text(xx[n], self.urcrnry + yoffset, lonlab, horizontalalignment='center', verticalalignment='bottom', fontsize=fontsize) # make sure axis ticks are turned off ax.set_xticks([]) ax.set_yticks([])
def matchum(file1, file2, tol=10, perr=4, aerr=1.0, nmax=40, im_masks1=[], im_masks2=[], debug=0, domags=0, xrange=None, yrange=None, sigma=4, aoffset=0): '''Take the output of two sextractor runs and match up the objects with each other (find out which objects in the first file match up with objects in the second file. The routine considers a 'match' to be any two objects that are closer than tol pixels (after applying the shift). Returns a 6-tuple: (x1,y1,x2,y2,o1,o2). o1 and o2 are the ojbects numbers such that o1[i] in file 1 corresponds to o2[i] in file 2.''' NA = num.NewAxis sexdata1 = readsex(file1) sexdata2 = readsex(file2) # Use the readsex data to get arrays of the (x,y) positions x1 = num.asarray(sexdata1[0]['X_IMAGE']) y1 = num.asarray(sexdata1[0]['Y_IMAGE']) x2 = num.asarray(sexdata2[0]['X_IMAGE']) y2 = num.asarray(sexdata2[0]['Y_IMAGE']) m1 = num.asarray(sexdata1[0]['MAG_BEST']) m2 = num.asarray(sexdata2[0]['MAG_BEST']) o1 = num.asarray(sexdata1[0]['NUMBER']) o2 = num.asarray(sexdata2[0]['NUMBER']) f1 = num.asarray(sexdata1[0]['FLAGS']) f2 = num.asarray(sexdata2[0]['FLAGS']) # First, make a cut on the flags: gids = num.where(f1 < 4) x1 = x1[gids] y1 = y1[gids] m1 = m1[gids] o1 = o1[gids] gids = num.where(f2 < 4) x2 = x2[gids] y2 = y2[gids] m2 = m2[gids] o2 = o2[gids] # next, if there is a range to use: if xrange is not None and yrange is not None: cond = num.greater(x1, xrange[0])*num.less(x1,xrange[1])*\ num.greater(y1, yrange[0])*num.less(y1,yrange[1]) gids = num.where(cond) x1 = x1[gids] y1 = y1[gids] m1 = m1[gids] o1 = o1[gids] cond = num.greater(x2, xrange[0])*num.less(x2,xrange[1])*\ num.greater(y2, yrange[0])*num.less(y2,yrange[1]) gids = num.where(cond) x2 = x2[gids] y2 = y2[gids] m2 = m2[gids] o2 = o2[gids] # Use the user masks for m in im_masks1: print "applying mask (%d,%d,%d,%d)" % tuple(m) condx = num.less(x1, m[0]) + num.greater(x1, m[1]) condy = num.less(y1, m[2]) + num.greater(y1, m[3]) gids = num.where(condx + condy) x1 = x1[gids] y1 = y1[gids] m1 = m1[gids] o1 = o1[gids] for m in im_masks2: print "applying mask (%d,%d,%d,%d)" % tuple(m) condx = num.less(x2, m[0]) + num.greater(x2, m[1]) condy = num.less(y2, m[2]) + num.greater(y2, m[3]) gids = num.where(condx + condy) x2 = x2[gids] y2 = y2[gids] m2 = m2[gids] o2 = o2[gids] if nmax: if len(x1) > nmax: ids = num.argsort(m1)[0:nmax] x1 = x1[ids] y1 = y1[ids] m1 = m1[ids] o1 = o1[ids] if len(x2) > nmax: ids = num.argsort(m2)[0:nmax] x2 = x2[ids] y2 = y2[ids] m2 = m2[ids] o2 = o2[ids] if debug: print "objects in frame 1:" print o1 print "objects in frame 2:" print o2 mp = pygplot.MPlot(2, 1, device='/XWIN') p = pygplot.Plot() p.point(x1, y1) [p.label(x1[i], y1[i], "%d" % o1[i]) for i in range(len(x1))] mp.add(p) p = pygplot.Plot() p.point(x2, y2) [p.label(x2[i], y2[i], "%d" % o2[i]) for i in range(len(x2))] mp.add(p) mp.plot() mp.close() # Now, we make 2-D arrays of all the differences in x and y between each pair # of objects. e.g., dx1[n,m] is the delta-x between object n and m in file 1 and # dy2[n,m] is the y-distance between object n and m in file 2. dx1 = x1[NA, :] - x1[:, NA] dx2 = x2[NA, :] - x2[:, NA] dy1 = y1[NA, :] - y1[:, NA] dy2 = y2[NA, :] - y2[:, NA] # Same, but with angles da1 = num.arctan2(dy1, dx1) * 180 / num.pi da2 = num.arctan2(dy2, dx2) * 180 / num.pi # Same, but with absolute distances ds1 = num.sqrt(num.power(dx1, 2) + num.power(dy1, 2)) ds2 = num.sqrt(num.power(dx2, 2) + num.power(dy2, 2)) # Here's the real magic: this is a matrix of matrices (4-D). Consider 4 objects: # objects i and j in file 1 and objects m and n in file 2. dx[i,j,m,n] is the # difference between delta-xs for objects i,j in file 1 and m,n in file 2. If object # i corresponds to object m and object j corresponds to object n, this should be a small # number, irregardless of an overall shift in coordinate systems between file 1 and 2. dx = dx1[::, ::, NA, NA] - dx2[NA, NA, ::, ::] dy = dy1[::, ::, NA, NA] - dy2[NA, NA, ::, ::] da = da1[::, ::, NA, NA] - da2[NA, NA, ::, ::] + aoffset ds = ds1[::, ::, NA, NA] - ds2[NA, NA, ::, ::] # pick out close pairs. #use = num.less(dy,perr)*num.less(dx,perr)*num.less(num.abs(da),aerr) use = num.less(ds, perr) * num.less(num.abs(da), aerr) use = use.astype(num.Int32) #use = num.less(num.abs(da),perr) suse = num.add.reduce(num.add.reduce(use, 3), 1) print suse[0] guse = num.greater(suse, suse.flat.max() / 2) i = [j for j in range(x1.shape[0]) if num.sum(guse[j])] m = [num.argmax(guse[j]) for j in range(x1.shape[0]) if num.sum(guse[j])] xx0, yy0, oo0, mm0 = num.take([x1, y1, o1, m1], i, 1) xx1, yy1, oo1, mm1 = num.take([x2, y2, o2, m2], m, 1) if debug: mp = pygplot.MPlot(2, 1, device='/XWIN') p = pygplot.Plot() p.point(xx0, yy0) [p.label(xx0[i], yy0[i], "%d" % oo0[i]) for i in range(len(xx0))] mp.add(p) p = pygplot.Plot() p.point(xx1, yy1) [p.label(xx1[i], yy1[i], "%d" % oo1[i]) for i in range(len(xx1))] mp.add(p) mp.plot() mp.close() xshift, xscat = stats.bwt(xx0 - xx1) xscat = max([1.0, xscat]) yshift, yscat = stats.bwt(yy0 - yy1) yscat = max([1.0, yscat]) mshift, mscat = stats.bwt(mm0 - mm1) print "xscat = ", xscat print "yscat = ", yscat print "xshift = ", xshift print "yshift = ", yshift print "mshift = ", mshift print "mscat = ", mscat keep = num.less(num.abs(xx0-xx1-xshift),sigma*xscat)*\ num.less(num.abs(yy0-yy1-yshift),sigma*yscat) # This is a list of x,y,object# in each file. xx0, yy0, oo0, xx1, yy1, oo1 = num.compress(keep, [xx0, yy0, oo0, xx1, yy1, oo1], 1) if debug: print file1, oo0 print file2, oo1 mp = pygplot.MPlot(2, 1, device='temp.ps/CPS') p1 = pygplot.Plot() p1.point(xx0, yy0, symbol=25, color='red') for i in range(len(xx0)): p1.label(xx0[i], yy0[i], " %d" % oo0[i], color='red') mp.add(p1) p2 = pygplot.Plot() p2.point(xx1, yy1, symbol=25, color='green') for i in range(len(xx1)): p2.label(xx1[i], yy1[i], " %d" % oo1[i], color='green') mp.add(p2) mp.plot() mp.close() if domags: return (xx0, yy0, mm0, xx1, yy1, mm1, mshift, mscat, oo0, oo1) else: return (xx0, yy0, xx1, yy1, oo0, oo1)
def _deltas( self, train_toks, #fd_list, labeled_tokens, labels, classifier, unattested, ffreq_emperical, nfmap, nfarray, nftranspose): """ Calculate the update values for the classifier weights for this iteration of IIS. These update weights are the value of C{delta} that solves the equation:: ffreq_emperical[i] = SUM[t,l] (classifier.prob(LabeledText(t,l)) * fd_list.detect(LabeledText(t,l))[i] * exp(delta[i] * nf(LabeledText(t,l)))) Where: - M{t} is a text C{labeled_tokens} - M{l} is an element of C{labels} - M{nf(ltext)} = SUM[M{j}] C{fd_list.detect}(M{ltext})[M{j}] This method uses Newton's method to solve this equation for M{delta[i]}. In particular, it starts with a guess of C{delta[i]}=1; and iteratively updates C{delta} with:: delta[i] -= (ffreq_emperical[i] - sum1[i])/(-sum2[i]) until convergence, where M{sum1} and M{sum2} are defined as:: sum1 = SUM[t,l] (classifier.prob(LabeledText(t,l)) * fd_list.detect(LabeledText(t,l))[i] * exp(delta[i] * nf(LabeledText(t,l)))) sum2 = SUM[t,l] (classifier.prob(LabeledText(t,l)) * fd_list.detect(LabeledText(t,l))[i] * nf(LabeledText(t,l)) * exp(delta[i] * nf(LabeledText(t,l)))) Note that M{sum1} and M{sum2} depend on C{delta}; so they need to be re-computed each iteration. The variables C{nfmap}, C{nfarray}, and C{nftranspose} are used to generate a dense encoding for M{nf(ltext)}. This allows C{_deltas} to calculate M{sum1} and M{sum2} using matrices, which yields a signifigant performance improvement. @param fd_list: The feature detector list for the classifier that this C{IISMaxentClassifierTrainer} is training. @type fd_list: C{FeatureDetectorListI} @param labeled_tokens: The set of training tokens. @type labeled_tokens: C{list} of C{Token} with C{LabeledText} type @param labels: The set of labels that should be considered by the classifier constructed by this C{IISMaxentClassifierTrainer}. @type labels: C{list} of (immutable) @param classifier: The current classifier. @type classifier: C{ClassifierI} @param ffreq_emperical: An array containing the emperical frequency for each feature. The M{i}th element of this array is the emperical frequency for feature M{i}. @type ffreq_emperical: C{sequence} of C{float} @param unattested: An array that is 1 for features that are not attested in the training data; and 0 for features that are attested. In other words, C{unattested[i]==0} iff C{ffreq_emperical[i]==0}. @type unattested: C{sequence} of C{int} @param nfmap: A map that can be used to compress C{nf} to a dense vector. @type nfmap: C{dictionary} from C{int} to C{int} @param nfarray: An array that can be used to uncompress C{nf} from a dense vector. @type nfarray: C{array} of C{float} @param nftranspose: C{array} of C{float} @type nftranspose: The transpose of C{nfarray} """ # These parameters control when we decide that we've # converged. It probably should be possible to set these # manually, via keyword arguments to train. NEWTON_CONVERGE = 1e-12 MAX_NEWTON = 30 deltas = numarray.ones(self._weight_vector_len, 'd') # Precompute the A matrix: # A[nf][id] = sum ( p(text) * p(label|text) * f(text,label) ) # over all label,text s.t. num_features[label,text]=nf A = numarray.zeros((len(nfmap), self._weight_vector_len), 'd') for i, tok in enumerate(train_toks): dist = classifier.get_class_probs(tok) # Find the number of active features. feature_vector = tok['FEATURE_VECTOR'] assignments = feature_vector.assignments() nf = sum([val for (id, val) in assignments]) # Update the A matrix for cls, offset in self._offsets.items(): for (id, val) in assignments: A[nfmap[nf], id + offset] += dist.prob(cls) * val A /= len(train_toks) # Iteratively solve for delta. Use the following variables: # - nf_delta[x][y] = nf[x] * delta[y] # - exp_nf_delta[x][y] = exp(nf[x] * delta[y]) # - nf_exp_nf_delta[x][y] = nf[x] * exp(nf[x] * delta[y]) # - sum1[i][nf] = sum p(text)p(label|text)f[i](label,text) # exp(delta[i]nf) # - sum2[i][nf] = sum p(text)p(label|text)f[i](label,text) # nf exp(delta[i]nf) for rangenum in range(MAX_NEWTON): nf_delta = numarray.outerproduct(nfarray, deltas) exp_nf_delta = numarray.exp(nf_delta) nf_exp_nf_delta = nftranspose * exp_nf_delta sum1 = numarray.sum(exp_nf_delta * A) sum2 = numarray.sum(nf_exp_nf_delta * A) # Avoid division by zero. sum2 += unattested # Update the deltas. deltas -= (ffreq_emperical - sum1) / -sum2 # We can stop once we converge. n_error = (numarray.sum(abs( (ffreq_emperical - sum1))) / numarray.sum(abs(deltas))) if n_error < NEWTON_CONVERGE: return deltas return deltas