Esempio n. 1
    def testMarginalise(self):
        def factorial(n):
            if n==1:return 1
            return factorial(n - 1) * n
        var = set('c')
        b = self.a.Marginalise(var)
        var2 = set(['c','a'])
        c = self.a.Marginalise(var2)
        d = DiscretePotential(['b','c'], [3,4], na.arange(12))

        # extended test
        a = DiscretePotential('a b c d e f'.split(), [2,3,4,5,6,7], \
        aa = a.Marginalise('c f a'.split())

        assert(b.names == self.a.names - var and \
               b[0,1] == na.sum(self.a[0,1]) and \
               c.names == self.a.names - var2 and \
               na.alltrue(c.cpt.flat == na.sum(na.sum(self.a.cpt,axis=2), axis=0)) and
               aa.shape == (3,5,6) and \
               aa.names_list == 'b d e'.split() and \
               aa[2,4,3] == na.sum(a[:,2,:,4,3,:].flat)), \
               " Marginalisation doesn't work"
Esempio n. 2
def PlotTurbulenceIllustr(a):

    Can generate the grid with
    g=kolmogorovutils.GenerateKolmogorov3D( 1025, 129, 129)


    for x in [1,10,100]:

        suba= numarray.sum(a[:,:,0:x], axis=2)

        pylab.savefig("temp/turb3d-sum%03i.eps" % x)

    for x in [1,10,100]:

        for j in [0,1,2]:

            suba= numarray.sum(a[:,:200,j*x:(j+1)*x], axis=2)

            pylab.savefig("temp/turb3d-sum%03i-s%i.eps" % (x,j))        
Esempio n. 3
def stderr(x, s, m):
    # calculates the standard error of x with errors s about mean m
    w = 1.0/s**2
    sumw = N.sum(w)
    sumw2 = N.sum(w*w)
    sumdx2 = N.sum(w * (x - m)**2)
    return sqrt(sumdx2/sumw2)
Esempio n. 4
def calc_scatter(x, y, sig, int_scat, a, b):
    if int_scat is None: int_scat = 0.0
    w = 1.0 / (sig**2 + int_scat**2)
    wSum = N.sum(w)
    varw = N.sum((y - a - b*x)**2 * w) / wSum
    #var = N.sum((y - a - b*x)**2) / len(x)
    stdev = sqrt(varw)
    return stdev
Esempio n. 5
def mean(x, s=None):
    # calculates the weighted mean of x with errors s
    if s is None:
        return N.sum(x) / len(x)
        w = 1.0/s**2
        sumw = N.sum(w)
        sumx = N.sum(w*x)
        return sumx/sumw
Esempio n. 6
 def draw_hour(self):
     months = self.get_month_range()
     elem = self.element_w.getvalue()
     g =['hour']
     # clear display
     # title
     if len(months) == 1:
         title = '%s %s %s (%d-%d)' % ((self.id_, \
             self.Element.get(elem, ''), self.Month[months[0]]) + \
         title = '%s %s %s-%s (%d-%d)' % ((self.id_, \
             self.Element.get(elem, ''), self.Month[months[0]], \
             self.Month[months[-1]]) + tuple(['years']))
     cell_text = []
     col_labels = ['%02d' % x for x in range(24)]
     xvals = na.arange(24) + 0.2
     # the bottom values for stacked bar chart
     yoff = na.zeros(len(col_labels), na.Float32) 
     num_cat = len(ClimLib.FlightCats)
     bar = [None]*num_cat
     widths = [0.6]*24
     # stacked bars
     # sum over selected range of months and wind directions
     tmp = na.sum(na.sum(na.take([elem], months, 0), 0), 1)
     # sum over all categories
     total = na.sum(tmp, -1)
     for row in range(num_cat):
         yvals = 100.0*tmp[:,row]/total
         bar[row] = g['ax'].bar(xvals, yvals, widths, bottom=yoff, 
         yoff += yvals
         cell_row = ['%.0f' % yvals[n] for n in range(24)]
     g['ax'].table(cellText=cell_text, rowLabels=self.RowLabels,
         rowColours=self.colors, colLabels=col_labels, 
     # legend
     legend_bars = [x[0] for x in bar]
     g['ax'].legend(legend_bars, self.RowLabels)
     # axes
     g['ax'].set_ylabel('Percent occurrence')
     ymax, delta = self.set_yticks(elem, 'hour')
     g['ax'].set_yticks(na.arange(0, ymax, delta))
Esempio n. 7
def variance(x, s=None, m=None):
    # calculates the weighted variance of x with errors s and mean m
    if m is None:  m = mean(x, s)
    if s is None:
        sumdx2 = N.sum((x - m)**2)
        return sumdx2/len(x)
        w = 1.0/s**2
        sumw = N.sum(w)
        sumdx2 = N.sum(w * (x - m)**2)
        return sumdx2/sumw
Esempio n. 8
 def Print(self):
     for c in self.v.values():
         print c
         print c.cpt
         print c.cpt.shape
         print na.sum(c.cpt.flat)
     for c in self.e.values():
         print c
         print c.cpt
         print c.cpt.shape
         print na.sum(c.cpt.flat)
    def setCounts(self):
        """ set the distributions underlying parameters (mu, sigma) to match the samples """
        assert(self.samples), "No samples given..."
        samples = na.array(self.samples, type='Float32')

        self.mean = na.sum(samples) / len(samples)
        deviation = samples - self.mean
        squared_deviation = deviation*deviation
        sum_squared_deviation = na.sum(squared_deviation)
        self.sigma = (sum_squared_deviation / (len(samples)-1.0)) ** 0.5
def _simple_logistic_regression(x,y,beta_start=None,verbose=False,
 Faster than logistic_regression when there is only one predictor.
    if len(x) != len(y):
        raise ValueError, "x and y should be the same length!"
    if beta_start is None:
        beta_start = NA.zeros(2,x.dtype.char)
    iter = 0; diff = 1.; beta = beta_start  # initial values
    if verbose:
        print 'iteration  beta log-likliehood |beta-beta_old|' 
    while iter < MAXIT:
        beta_old = beta 
        p = NA.exp(beta[0]+beta[1]*x)/(1.+NA.exp(beta[0]+beta[1]*x))
        l = NA.sum(y*NA.log(p) + (1.-y)*NA.log(1.-p)) # log-likliehood
        s = NA.array([NA.sum(y-p), NA.sum((y-p)*x)])  # scoring function
        # information matrix
        J_bar = NA.array([[NA.sum(p*(1-p)),NA.sum(p*(1-p)*x)],
        beta = beta_old +,s) # new value of beta
        diff = NA.sum(NA.fabs(beta-beta_old)) # sum of absolute differences
        if verbose:
            print iter+1, beta, l, diff
        if diff <= CONV_THRESH: break
        iter = iter + 1
    return beta, J_bar, l
Esempio n. 12
def test_Mcmc():
    print "*** Testing Mcmc ***"

    rosen = optimizers.Rosen()
    startingValues = vector([2., 2.])

    rosen.setParamBounds("x", -10., 10.)
    rosen.setParamBounds("y", -10., 10.)

    verbose = 0
    minuitObj = optimizers.Minuit(rosen)
    minuitObj.find_min(verbose, 1e-4)
    # use uncertainties from Minuit to set the transition widths
    sigmas = minuitObj.getUncertainty()
    scale = 1
    widths = vector(map(lambda x: x * scale, sigmas))

    mcmcObj = optimizers.Mcmc(rosen)

    samples = vecvec()
    nsamp = 100000
    # Do a burn in...
    mcmcObj.generateSamples(samples, nsamp)
    # then the real thing...
    nsamp = 100000
    mcmcObj.generateSamples(samples, nsamp)
    x = list(samples)
    samps = []
    for j in range(len(x[0])):
        samps.append(numarray.array(map(lambda i: x[i][j], range(len(x)))))

    print "MCMC results for ", nsamp, " trials:"
    for j in range(len(samples[0])):
        x = samps[j]
        mc_avg = numarray.sum(x) / len(x)
        mc2_avg = numarray.sum(x * x) / len(x)
        print "Parameter %i: %e +/- %e" % (
            j, mc_avg, math.sqrt(mc2_avg - mc_avg * mc_avg))

    print "*** Mcmc tests completed ***\n"
    return samps
    def sample(self, index={}):
        """ returns the index of the sampled value
        eg. a=Pr(A)=[0.5 0.3 0.0 0.2]
            a.sample() -->	5/10 times will return 0
                            3/10 times will return 1
                            2/10 times will return 3
                            2 will never be returned

            - returns an integer
            - only works for one variable tables
              eg. a=Pr(A,B); a.sample() --> ERROR
        assert(len(self.names) == 1 or \
               len(self.names - set(index.keys())) == 1), \
               "Sample only works for one variable tables"
        if not index == {}:
            tcpt = self.__getitem__(index)
            tcpt = self.cpt
        # csum is the cumulative sum of the distribution
        # csum[i] = na.sum(self.cpt[0:i])
        # csum[-1] = na.sum(self.cpt)
        csum = [na.sum(tcpt.flat[0:end+1]) for end in range(tcpt.shape[0])]

        # sample in this distribution
        r = random.random()
        for i,cs in enumerate(csum):
            if r < cs: return i
        return i
Esempio n. 14
def Animate(g):
    for i in range(1,64,5):
        x= g[0:i,:,:]
        y= numarray.sum(x, axis=0)
        pylab.matshow( y)
        pylab.savefig("temp/3dturb-%03i.png" % i)
Esempio n. 15
    def run_kut5(F, x, y, h):
        # Runge-Kutta-Fehlberg formulas
        C = array([37./378, 0., 250./621, 125./594,          \
                   0., 512./1771])
        D = array([2825./27648, 0., 18575./48384,            \
                   13525./55296, 277./14336, 1./4])
        n = len(y)
        K = zeros((6, n), type=Float64)
        K[0] = h * F(x, y)
        K[1] = h * F(x + 1. / 5 * h, y + 1. / 5 * K[0])
        K[2] = h * F(x + 3. / 10 * h, y + 3. / 40 * K[0] + 9. / 40 * K[1])
        K[3] = h*F(x + 3./5*h, y + 3./10*K[0]- 9./10*K[1]    \
               + 6./5*K[2])
        K[4] = h*F(x + h, y - 11./54*K[0] + 5./2*K[1]        \
               - 70./27*K[2] + 35./27*K[3])
        K[5] = h*F(x + 7./8*h, y + 1631./55296*K[0]          \
               + 175./512*K[1] + 575./13824*K[2]             \
               + 44275./110592*K[3] + 253./4096*K[4])
        # Initialize arrays {dy} and {E}
        E = zeros((n), type=Float64)
        dy = zeros((n), type=Float64)
        # Compute solution increment {dy} and per-step error {E}
        for i in range(6):
            dy = dy + C[i] * K[i]
            E = E + (C[i] - D[i]) * K[i]

    # Compute RMS error e
        e = sqrt(sum(E**2) / n)
        return dy, e
    def sample(self, index={}):
        """ in order to sample from this distributions, all parents must be known """
#		 mean = self.mean.copy()
#		 sigma = self.sigma.copy()
##		  if index:
##			  # discrete parents
##			  for v,i in enumerate(reversed(self.discrete_parents)):
##			  # reverse: avoid missing axes when taking in random
##			  # we start from the end, that way all other dimensions keep the same index
##				  if index.has_key( 
##					  # take the corresponding mean; +1 because first axis is the mean
##					  mean = na.take(mean, index[v], axis=(i+1) )
##					  # take the corresponding covariance; +2 because first 2 axes are the cov
##					  sigma = na.take(sigma, index[v], axis=(i+2) )
##			  # continuous parents
##			  for v in reversed(self.continuous_parents):
##				  if index.has_key(v):

        d_index, c_index = self._numIndexFromDict(index)
        mean  = na.array(self.mean[tuple([slice(None, None, None)] + d_index)])
        sigma = self.sigma[tuple([slice(None, None, None)] * 2 +d_index)]
        wi = na.sum(self.weights * na.array(c_index)[na.NewAxis,...], axis=1)

#		 if self.continuous_parents:
#			 wi = na.array(self.weights[tuple([slice(None,None,None)]+c_index)])
#		 else: wi = 0.0

        # return a random number from a normal multivariate distribution
        return float(ra.multivariate_normal(mean + wi, sigma))
Esempio n. 17
 def run_kut5(F, x, y, h):
     # Runge-Kutta-Fehlberg formulas
     C = array([37.0 / 378, 0.0, 250.0 / 621, 125.0 / 594, 0.0, 512.0 / 1771])
     D = array([2825.0 / 27648, 0.0, 18575.0 / 48384, 13525.0 / 55296, 277.0 / 14336, 1.0 / 4])
     n = len(y)
     K = zeros((6, n), type=Float64)
     K[0] = h * F(x, y)
     K[1] = h * F(x + 1.0 / 5 * h, y + 1.0 / 5 * K[0])
     K[2] = h * F(x + 3.0 / 10 * h, y + 3.0 / 40 * K[0] + 9.0 / 40 * K[1])
     K[3] = h * F(x + 3.0 / 5 * h, y + 3.0 / 10 * K[0] - 9.0 / 10 * K[1] + 6.0 / 5 * K[2])
     K[4] = h * F(x + h, y - 11.0 / 54 * K[0] + 5.0 / 2 * K[1] - 70.0 / 27 * K[2] + 35.0 / 27 * K[3])
     K[5] = h * F(
         x + 7.0 / 8 * h,
         + 1631.0 / 55296 * K[0]
         + 175.0 / 512 * K[1]
         + 575.0 / 13824 * K[2]
         + 44275.0 / 110592 * K[3]
         + 253.0 / 4096 * K[4],
     # Initialize arrays {dy} and {E}
     E = zeros((n), type=Float64)
     dy = zeros((n), type=Float64)
     # Compute solution increment {dy} and per-step error {E}
     for i in range(6):
         dy = dy + C[i] * K[i]
         E = E + (C[i] - D[i]) * K[i]
     # Compute RMS error e
     e = sqrt(sum(E ** 2) / n)
     return dy, e
Esempio n. 18
 def combine_count(self):
     """Combines 4-dimensional dictionary along items"""
     month_ix, hour_ix, wind_ix = range(3)
     tc =[self.stats_dialog.element.getvalue()]
     if 'month' not in items:
         tc = na.sum(tc, month_ix)
         hour_ix -= 1
         wind_ix -= 1
     if 'hour' not in items:
         tc = na.sum(tc, hour_ix)
         wind_ix -= 1
     if 'wdir' not in items:
         tc = na.sum(tc, wind_ix)
     return tc
Esempio n. 19
    def drawmeridians(self,ax,meridians,color='k',linewidth=1., \
 draw meridians (longitude lines).

 ax - current axis instance.
 meridians - list containing longitude values to draw (in degrees).
 color - color to draw meridians (default black).
 linewidth - line width for meridians (default 1.)
 linestyle - line style for meridians (default '--', i.e. dashed).
 dashes - dash pattern for meridians (default [1,1], i.e. 1 pixel on,
  1 pixel off).
        if self.projection not in ['merc','cyl']:
            lats = N.arange(-80,81).astype('f')
            lats = N.arange(-90,91).astype('f')
        xdelta = 0.1*(self.xmax-self.xmin)
        ydelta = 0.1*(self.ymax-self.ymin)
        for merid in meridians:
            lons = merid*N.ones(len(lats),'f')
            x,y = self(lons,lats)
            # remove points outside domain.
            testx = N.logical_and(x>=self.xmin-xdelta,x<=self.xmax+xdelta)
            x = N.compress(testx, x)
            y = N.compress(testx, y)
            testy = N.logical_and(y>=self.ymin-ydelta,y<=self.ymax+ydelta)
            x = N.compress(testy, x)
            y = N.compress(testy, y)
            if len(x) > 1 and len(y) > 1:
                # split into separate line segments if necessary.
                # (not necessary for mercator or cylindrical).
                xd = (x[1:]-x[0:-1])**2
                yd = (y[1:]-y[0:-1])**2
                dist = N.sqrt(xd+yd)
                split = dist > 500000.
                if N.sum(split) and self.projection not in ['merc','cyl']:
                   ind = (N.compress(split,MLab.squeeze(split*N.indices(xd.shape)))+1).tolist()
                   xl = []
                   yl = []
                   iprev = 0
                   for i in ind:
                       iprev = i
                    xl = [x]
                    yl = [y]
                # draw each line segment.
                for x,y in zip(xl,yl):
                    # skip if only a point.
                    if len(x) > 1 and len(y) > 1:
                        l = Line2D(x,y,linewidth=linewidth,linestyle=linestyle)
Esempio n. 20
 def __init__(self, names, shape, g=None, h=None, K=None):
     Potential.__init__(self, names)
     self.shape = shape
     # set parameters to 0s
     self.n = na.sum(shape)
     if not g: self.g = 0.0
     else: self.g = float(g)
     if not h: self.h = na.zeros(shape=(self.n), type='Float32')     
     else: self.h = na.array(h,shape=(self.n), type='Float32')
     if not K: self.K = na.zeros(shape=(self.n, self.n), type='Float32')
     else: self.K = na.array(K, shape=(self.n, self.n), type='Float32')
Esempio n. 21
def med_func(x, y, sig, b):
    small = 1.0e-8
    aa = median(y - b*x)
    d = (y - aa - b*x)
    mad = median(N.absolute(d))
    s = mad  / 0.6745
    d /= sig
    sign = N.compress(N.absolute(d) > small, d)
    sign = sign / N.absolute(sign)
    x = N.compress(N.absolute(d) > small, x)
    sum = N.sum(sign * x)
    return sum, s, aa
Esempio n. 22
 def set_yticks(self, elem, item):
     if self.tkAutoScale.get():
         if item in ['month', 'hour']:
             # sum over wind directions and categories
             total = na.sum(na.sum([elem], 3), 2)
             # fudge columns with no events
             total[na.ieeespecial.index(total, na.ieeespecial.ZERO)] = 1.0
             ymax = 100.0*(na.sum(na.sum([elem][:,:,:,:-1], 3), 
         elif item in ['wdir']:
             # sum categories
             total = na.sum([elem], 3)
             # fudge columns with no events
             total[na.ieeespecial.index(total, na.ieeespecial.ZERO)] = 1.0
             ymax = 100.0*(na.sum([elem][:,:,:,:-1], 3)/total).max()
             raise Avn.AvnError('Bug in set_yticks(): unexpected item: %s' % \
         ymax = int(self.tkScale.get())
     if ymax > 30:
         delta = 10
     elif ymax > 10:
         delta = 5
         delta = 2
     return ymax+delta, delta
Esempio n. 23
    def recalculate_position(self):
        takes all the positions of the associated observation list and recalculated a position
        global blah
        if self.stype == "real":
            print "! not supposed to do this with a real source.."
        if len(self.associated_obs) == 0:

        pos_array = numarray.fromlist(
            [[x.pos[0], x.pos[1], x.assumed_err[0], x.assumed_err[1]]
             for x in self.associated_obs])
        raa = numarray.fromlist([x[0] for x in pos_array])
        raerra = numarray.fromlist([x[2] for x in pos_array])
        deca = numarray.fromlist([x[1] for x in pos_array])
        decerra = numarray.fromlist([x[3] for x in pos_array])

        ra = numarray.sum(raa / raerra**2) / numarray.sum(1.0 / raerra**2)
        dec = numarray.sum(deca / decerra**2) / numarray.sum(1.0 / decerra**2)
        raerr = math.sqrt(1.0 / numarray.sum(1.0 / raerra**2))
        decerr = math.sqrt(1.0 / numarray.sum(1.0 / decerra**2))
        self.current_pos = [ra, dec]
        self.current_err = [raerr, decerr]
Esempio n. 24
def calcCorrelationHelper(s1p, s2p):
    # if the traits share less than six strains, then we don't
    # bother with the correlations
    if len(s1p) < 6:
        return 0.0
    # subtract by x-bar and y-bar elementwise
    #oldS1P = s1p.copy()
    #oldS2P = s2p.copy()
    s1p = (s1p - numarray.average(s1p)).astype(numarray.Float64)
    s2p = (s2p - numarray.average(s2p)).astype(numarray.Float64)

    # square for the variances 
    s1p_2 = numarray.sum(s1p**2)
    s2p_2 = numarray.sum(s2p**2)

        corr = (numarray.sum(s1p*s2p)/
                numarray.sqrt(s1p_2 * s2p_2))
    except ZeroDivisionError:
        corr = 0.0

    return corr
 def normalize(self, dim=-1):
     """ If dim=-1 all elements sum to 1.  Otherwise sum to specific dimension, such that 
     sum(Pr(x=i|Pa(x))) = 1 for all values of i and a specific set of values for Pa(x)
     if dim == -1 or len(self.cpt.shape) == 1:
         self.cpt /= self.cpt.sum()            
         ndim = self.assocdim[dim]
         order = range(len(self.names_list))
         order[0] = ndim
         order[ndim] = 0
         tcpt = na.transpose(self.cpt, order)
         t1cpt = na.sum(tcpt, axis=0)
         t1cpt = na.resize(t1cpt,tcpt.shape)
         tcpt = tcpt/t1cpt
         self.cpt = na.transpose(tcpt, order)
Esempio n. 26
def binitsumequal(x, y, n):  #bin arrays x, y into n bins, returning xbin,ybin
    nx = len(x)
    y = N.take(y, N.argsort(x))
    x = N.take(x, N.argsort(x))
    xbin = N.zeros(n, 'f')
    ybin = N.zeros(n, 'f')
    for i in range(n):
        nmin = i * int(float(nx) / float(n))
        nmax = (i + 1) * int(float(nx) / float(n))
        xbin[i] = N.average(x[nmin:nmax])
        ybin[i] = N.sum(y[nmin:nmax])
    return xbin, ybin  #, ybinerr
Esempio n. 27
def wmspec(data, windows=None, doxs=1):

    """Compute power and cross spectra, with a window or multitapers.

    data and doxs are as for multispec
    (except the default for doxs is different).

    If windows is one-dimensional, it is treated as a windowing function.

    If windows is two-dimensional, it is treated as a set of multitapers."""

    data = num.asarray(data)

    if windows is None:
        windows = [1]
        windows = num.array(windows, copy=0)
        win_length = windows.shape[-1]
        windows.shape = (-1, win_length)
    nwin = len(windows)

    # subtract the average
    avg = num.sum(data, -1) / data.shape[-1]
    avg = num.asarray(avg)
    data = data - avg[..., num.NewAxis]

    total_power = 0
    if doxs:
        total_cross = 0
    for window in windows:
        spectra = multispec(data * window, doxs=doxs)
        if doxs:
            total_power += spectra[0]
            total_cross += spectra[1]
            total_power += spectra
    total_power /= nwin
    if doxs:
        total_cross /= nwin
    if doxs:
        return total_power, total_cross
        return total_power
Esempio n. 28
Esempio n. 29
def biwt_func(x, y, sig, b): # Problems?!?
    aa = median(y - b*x)
    d = (y - aa - b*x)
    mad = median(N.absolute(d))
    s = mad  / 0.6745
    d /= sig
    # biweight
    c = 6.0
    f = d*(1-d**2/c**2)**2
    sum = N.sum(N.compress(N.absolute(d) <= c, x*f))
    # lorentzian
    #f = d/(1+0.5*d**2)
    #sum = N.sum(x*f)
    # MAD
    #small = 1.0e-8
    #sign = N.compress(N.absolute(d) > small, d)
    #sign = sign / N.absolute(sign)
    #sum = N.sum(N.compress(N.absolute(d) > small, x)*sign)
    return sum, s, aa
Esempio n. 30
    def azmr(self):
	x=N.compress((self.mpaflag > 0.1) & (self.ew > 4.) & (self.Mabs < -18.),self.Mabs)
	y=N.compress((self.mpaflag > 0.1) & (self.ew > 4.) & (self.Mabs < -18.),
	x1=N.compress((self.mpaflag > 0.1) & (self.ew > 4.) & (self.Mabs < -20.38),self.Mabs)
	y1=N.compress((self.mpaflag > 0.1) & (self.ew > 4.) & (self.Mabs < -20.38),

	print "average Ar for Mr < -20.38 = %5.2f +/- %5.2f"%(N.average(y1),pylab.std(y1))
	for i in range(len(xbin)):
	    print i,xbin[i],ybin[i]
	print "Average of binned values = ",N.average(ybin)
	print "average Ar for Mr < -20.38 = %5.2f +/- %5.2f"%(N.average(N.log10(y1)),pylab.std(N.log10(y1)))
	for i in range(len(xbin)):
	    print i,xbin[i],ybin[i]


	print "fraction w/MPA stellar mass and Az = ",N.sum(self.mpaflag)/(1.*len(self.mpaflag))
Esempio n. 31
	def get_year_count(stn=''):
	    #returns a list of TAF site USAF ID #s given a TAF site ID
	    if not stn: return []

	    ids = cdutils.getHistory(stn)

	    fh = file(ish_inv,'r')
	    lines = {}

	    for line in fh.readlines():
		if line[:12] in ids:
		    line_arr = [el.strip() for el in line.split(' ') if el != '']
		    if not lines.has_key((line_arr[0],line_arr[1])):
		        lines[(line_arr[0], line_arr[1])] = []
		    lines[(line_arr[0], line_arr[1])].append([line_arr[2], \
		        numarray.sum([int(x) for x in line_arr[3:14]])])
	    for key in lines:
		lines[key] = continuity_check(lines[key])
	    return lines
Esempio n. 32
def integrate(F,x,y,xStop,tol):

    def midpoint(F,x,y,xStop,nSteps):
  # Midpoint formulas
        h = (xStop - x)/nSteps
        y0 = y
        y1 = y0 + h*F(x,y0)
        for i in range(nSteps-1):
            x = x + h
            y2 = y0 + 2.0*h*F(x,y1)
            y0 = y1
            y1 = y2
        return 0.5*(y1 + y0 + h*F(x,y2))

    def richardson(r,k):
  # Richardson's extrapolation      
        for j in range(k-1,0,-1):
            const = 4.0**(k-j)
            r[j] = (const*r[j+1] - r[j])/(const - 1.0)
    kMax = 51
    n = len(y)
    r = zeros((kMax,n),type=Float64)
  # Start with two integration steps
    nSteps = 2
    r[1] = midpoint(F,x,y,xStop,nSteps)
    r_old = r[1].copy()
  # Double the number of integration points 
  # and refine result by Richardson extrapolation
    for k in range(2,kMax):
        nSteps = nSteps*2
        r[k] = midpoint(F,x,y,xStop,nSteps)
      # Compute RMS change in solution
        e = sqrt(sum((r[1] - r_old)**2)/n)
      # Check for convergence
        if e < tol: return r[1]
        r_old = r[1].copy()
    print "Midpoint method did not converge"            
Esempio n. 33
    def Marginalise(self, varnames):
        """ Marginalises the variables specified in varnames.
        eg. a = Pr(A,B,C,D)
            a.Marginalise(['A','C']) --> Pr(B,D) = Sum(A,C)(Pr(A,B,C,D))

        returns a new DiscretePotential instance
        the variables keep their relative order
        temp = self.cpt.view()
        ax = [self.assocdim[v] for v in varnames]
        ax.sort(reverse=True)  # sort and reverse list to avoid inexistent dimensions
        newnames = copy(self.names_list)
        for a in ax:
            temp = na.sum(temp, axis=a)

        #---ERROR : In which order ?????
        #remainingNames = self.names - set(varnames)
        #remainingNames_list = [name for name in self.names_list if name in remainingNames]

        return self.__class__(newnames, temp.shape, temp)
Esempio n. 34
def integrate(F, x, y, xStop, tol):
    def midpoint(F, x, y, xStop, nSteps):
        # Midpoint formulas
        h = (xStop - x) / nSteps
        y0 = y
        y1 = y0 + h * F(x, y0)
        for i in range(nSteps - 1):
            x = x + h
            y2 = y0 + 2.0 * h * F(x, y1)
            y0 = y1
            y1 = y2
        return 0.5 * (y1 + y0 + h * F(x, y2))

    def richardson(r, k):
        # Richardson's extrapolation
        for j in range(k - 1, 0, -1):
            const = 4.0**(k - j)
            r[j] = (const * r[j + 1] - r[j]) / (const - 1.0)

    kMax = 51
    n = len(y)
    r = zeros((kMax, n), type=Float64)
    # Start with two integration steps
    nSteps = 2
    r[1] = midpoint(F, x, y, xStop, nSteps)
    r_old = r[1].copy()
    # Double the number of integration points
    # and refine result by Richardson extrapolation
    for k in range(2, kMax):
        nSteps = nSteps * 2
        r[k] = midpoint(F, x, y, xStop, nSteps)
        richardson(r, k)
        # Compute RMS change in solution
        e = sqrt(sum((r[1] - r_old)**2) / n)
        # Check for convergence
        if e < tol: return r[1]
        r_old = r[1].copy()
    print "Midpoint method did not converge"
Esempio n. 35
def solveforab(aveaperr, avearea):  #(a,b)=solveforab(aveap,avearea)
    amin = .0
    amax = 1
    bmin = .0
    bmax = 1
    step = .005
    a = N.arange(amin, amax, step, 'f')
    b = N.arange(bmin, bmax, step, 'f')
    y = N.zeros(len(aveaperr), 'f')
    diff = N.zeros(len(aveaperr), 'f')
    mini = 1000000000.
    for ai in a:
        for bi in b:
            y = N.zeros(len(avearea), 'f')
            y = N.sqrt(avearea) * ai * (1. + bi * N.sqrt(avearea))
            #diff = N.sqrt((y - aveap)**2)
            diff = (y - aveaperr)
            sumdiff = N.sum(abs(diff))
            #print "%5.3f %5.3f %8.2f %8.2f" %(ai,bi,sumdiff,mini)
            if sumdiff < mini:
                afinal = ai
                bfinal = bi
                mini = sumdiff
    return afinal, bfinal
Esempio n. 36
Esempio n. 37
def stddev2(numbers):
    n, = numbers.shape
    sum = numarray.sum(numbers)
    sum_of_squares = numarray.sum(numbers * numbers)
    return sqrt(sum_of_squares / n - (sum / n)**2)
Esempio n. 38
def mad_combine(infileglob, outfilebase):
    # get list of files to operate on
    files = glob.glob(infileglob)
    # check more than one image exists
    if files < 2:
        print 'Less than two input files found!'
    print 'Operating on images',infileglob
    # get images
    images = _get_images(files)
    lenimages = len(images)
    print 'Found', lenimages, 'images'
    # get header of first image
    hdr = images[0][0].header
    # get ascard of first image
    hdrascard = hdr.ascard
    # get data from images into num (untidy tuple concatenation)
    data = num.zeros((lenimages,) + images[0][0].data.shape, num.Float32)
    for i in range(lenimages):
        data[i,:,:] = images[i][0].data
    # delete array
    del images
    # sort data
    print 'Sorting... (this may take a while, i.e an hour or so!)'
    #data_sorted = num.sort(data, axis=0)
    data_sorted = data  # for debugging
    # find standard deviation of lowest n - n_discard pixels
    print 'Calculating mad_low...'
    mad_low = _mad3(data_sorted[:-n_discard,:,:])
    # correct for bias to stddev
    num.multiply(mad_low, corr[`lenimages - n_discard` + ',' + `lenimages`], mad_low)
    # make median image
    print 'Calculating median...'
    if lenimages%2 != 0:  # then odd number of images
        m = (lenimages - 1) / 2
        median = data_sorted[m,:,:]
    else:                   # even number of images
        m = lenimages / 2
        median = (data_sorted[m,:,:] + data_sorted[m-1,:,:]) / 2
    # delete array
    del data_sorted
    # get ccd properties from header
    # these keywords are for FORS2
    # - they may need altering for other instruments

    gain = hdr['OUT1GAIN'] # N_{ADU} = gain * N_{e-}
    invgain = 1.0 / gain   # N_{e-} = invgain * N_{ADU}
    ron  = hdr['OUT1RON']  # read out noise in e- 
    # take only +ve values in median
    median_pos = num.choose(median < 0.0, (median, 0.0))
    # calculate sigma due to ccd noise for each pixel
    print 'Calculating noise_med...'
    noise_med = num.sqrt(median_pos * invgain + ron*ron) * gain
    # delete array
    del median_pos
    # find maximum of noise and mad_low
    # -> sigma to test pixels against to identify cosmics
    print 'Calculating sigma_test...'
    sigma_test = num.choose(noise_med < mad_low, (noise_med, mad_low))
    # delete arrays
    del mad_low, noise_med
    # calculate 'relative residual' for each pixel
    print 'Calculating rel_res...'
    rel_res = num.zeros(data.shape, num.Float32)
    res = num.zeros(data[0].shape, num.Float32)
    for i in range(lenimages):
        num.subtract(data[i,:,:], median, res)
        num.divide(res, sigma_test, rel_res[i,:,:])
    # delete arrays
    del sigma_test, res
    # now average over all pixels for which rel_res < sigma_limit
    # first count number included for each pixel
    # by testing to produce a boolean array, then summing over.
    print 'Calculating included...'
    included = num.zeros(rel_res[0].shape, num.Int16)
    included[:,:] = num.sum(rel_res <= sigma_limit)
    # put all discarded pixels to zero
    print 'Calculating combined...'
    pre_combine = num.choose(rel_res <= sigma_limit, (0.0,data))
    # delete array
    del rel_res
    # sum all pixels and divide by included to give mean
    combined = num.sum(pre_combine)
    # delete array
    del pre_combine
    num.divide(combined, included, combined)
    # Work out errors on this combined image
    # take only +ve values in combined
    mean_pos = num.choose(combined < 0.0, (combined, 0.0))
    # calculate sigma due to ccd noise for each pixel
    print 'Calculating noise_mean...'
    noise_mean = num.sqrt(mean_pos * invgain + ron*ron) * gain
    # delete array
    del mean_pos
    # create standard error image
    print 'Calculating error...'
    error = noise_mean / num.sqrt(included)
    # delete array
    del noise_mean
    # write all images to disk
    print 'Writing images to disk...'
    _write_images(combined, error, included,
                  hdrascard, outfilebase)
Esempio n. 39
def estimate_mixture(models, seqs, max_iter, eps, alpha=None):
    """ Given a Python-list of models and a SequenceSet seqs
    perform an nested EM to estimate maximum-likelihood
    parameters for the models and the mixture coefficients.
    The iteration stops after max_iter steps or if the
    improvement in log-likelihood is less than eps.

    alpha is a numarray of dimension len(models) containing
    the mixture coefficients. If alpha is not given, uniform
    values will be chosen.
    Result: The models are changed in place. Return value
    is (l, alpha, P) where l is the final log likelihood of
    seqs under the mixture, alpha is a numarray of
    dimension len(models) containing the mixture coefficients
    and P is a (|sequences| x |models|)-matrix containing
    P[model j| sequence i]
    done = 0
    iter = 1
    last_mixture_likelihood = -99999999.99
    # The (nr of seqs x nr of models)-matrix holding the likelihoods
    l = numarray.zeros((len(seqs), len(models)), numarray.Float)
    if alpha == None: # Uniform alpha
        logalpha = numarray.ones(len(models), numarray.Float) * \
        logalpha = numarray.log(alpha)
    print logalpha, numarray.exp(logalpha)
    log_nrseqs = math.log(len(seqs))

    while 1:
        # Score all sequences with all models
        for i, m in enumerate(models):
            loglikelihood = m.loglikelihoods(seqs)
            # numarray slices: l[:,i] is the i-th column of l
            l[:,i] = numarray.array(loglikelihood)

        #print l
        for i in xrange(len(seqs)):
            l[i] += logalpha # l[i] = ( log( a_k * P[seq i| model k]) )
        #print l
        mixture_likelihood = numarray.sum(numarray.sum(l))
        print "# iter %s joint likelihood = %f" % (iter, mixture_likelihood) 

        improvement = mixture_likelihood - last_mixture_likelihood
        if iter > max_iter or improvement < eps:

        # Compute P[model j| seq i]
        for i in xrange(len(seqs)):
            seq_logprob = sumlogs(l[i]) # \sum_{k} a_k P[seq i| model k]
            l[i] -= seq_logprob # l[i] = ( log P[model j | seq i] )

        #print l
        l_exp = numarray.exp(l) # XXX Use approx with table lookup
        #print "exp(l)", l_exp
        #print numarray.sum(numarray.transpose(l_exp)) # Print row sums

        # Compute priors alpha
        for i in xrange(len(models)):
            logalpha[i] = sumlogs(l[:,i]) - log_nrseqs

        #print "logalpha", logalpha, numarray.exp(logalpha)

        for j, m in enumerate(models):
            # Set the sequence weight for sequence i under model m to P[m| i]
            for i in xrange(len(seqs)):
            m.baumWelch(seqs, 10, 0.0001)

        iter += 1
        last_mixture_likelihood = mixture_likelihood

    return (mixture_likelihood, numarray.exp(logalpha), l_exp)
def logistic_regression(x,
 Uses the Newton-Raphson algorithm to calculate a maximum
 likelihood estimate logistic regression.
 The algorithm is known as 'iteratively re-weighted least squares', or IRLS.

 x - rank-1 or rank-2 array of predictors. If x is rank-2,
     the number of predictors = x.shape[0] = N.  If x is rank-1,
     it is assumed N=1.
 y - binary outcomes (if N>1 len(y) = x.shape[1], if N=1 len(y) = len(x))
 beta_start - initial beta vector (default zeros(N+1,x.dtype.char))
 if verbose=True, diagnostics printed for each iteration (default False).
 MAXIT - max number of iterations (default 500)
 CONV_THRESH - convergence threshold (sum of absolute differences
  of beta-beta_old, default 0.001)

 returns beta (the logistic regression coefficients, an N+1 element vector),
 J_bar (the (N+1)x(N+1) information matrix), and l (the log-likeliehood).
 J_bar can be used to estimate the covariance matrix and the standard
 error for beta.
 l can be used for a chi-squared significance test.

 covmat = inverse(J_bar)     --> covariance matrix of coefficents (beta)
 stderr = sqrt(diag(covmat)) --> standard errors for beta
 deviance = -2l              --> scaled deviance statistic
 chi-squared value for -2l is the model chi-squared test.
    if x.shape[-1] != len(y):
        raise ValueError, "x.shape[-1] and y should be the same length!"
        N, npreds = x.shape[1], x.shape[0]
    except:  # single predictor, use simple logistic regression routine.
        return _simple_logistic_regression(x,
    if beta_start is None:
        beta_start = NA.zeros(npreds + 1, x.dtype.char)
    X = NA.ones((npreds + 1, N), x.dtype.char)
    X[1:, :] = x
    Xt = NA.transpose(X)
    iter = 0
    diff = 1.
    beta = beta_start  # initial values
    if verbose:
        print 'iteration  beta log-likliehood |beta-beta_old|'
    while iter < MAXIT:
        beta_old = beta
        ebx = NA.exp(, X))
        p = ebx / (1. + ebx)
        l = NA.sum(y * NA.log(p) +
                   (1. - y) * NA.log(1. - p))  # log-likeliehood
        s =, y - p)  # scoring function
        J_bar = * p, Xt)  # information matrix
        beta = beta_old +, s)  # new value of beta
        diff = NA.sum(NA.fabs(beta - beta_old))  # sum of absolute differences
        if verbose:
            print iter + 1, beta, l, diff
        if diff <= CONV_THRESH: break
        iter = iter + 1
    if iter == MAXIT and diff > CONV_THRESH:
        print 'warning: convergence not achieved with threshold of %s in %s iterations' % (
            CONV_THRESH, MAXIT)
    return beta, J_bar, l
 # correlations
 r12 = 0.5  # average correlation between the first predictor and the obs.
 r13 = 0.25  # avg correlation between the second predictor and the obs.
 r23 = 0.125  # avg correlation between predictors.
 # random draws from trivariate normal distribution
 x = multivariate_normal(
     NA.array([0, 0, 0]),
     NA.array([[1, r12, r13], [r12, 1, r23], [r13, r23, 1]]), nsamps)
 x2 = multivariate_normal(
     NA.array([0, 0, 0]),
     NA.array([[1, r12, r13], [r12, 1, r23], [r13, r23, 1]]), nsamps)
 print 'correlations (r12,r13,r23) = ', r12, r13, r23
 print 'number of realizations = ', nsamps
 # training data.
 obs = x[:, 0]
 climprob = NA.sum((obs > 0).astype('f')) / nsamps
 fcst = NA.transpose(x[:, 1:])  # 2 predictors.
 obs_binary = obs > 0.
 # independent data for verification.
 obs2 = x2[:, 0]
 fcst2 = NA.transpose(x2[:, 1:])
 # compute logistic regression.
 beta, Jbar, llik = logistic_regression(fcst, obs_binary, verbose=True)
 covmat = LA.inverse(Jbar)
 stderr = NA.sqrt(mlab.diag(covmat))
 print 'beta =', beta
 print 'standard error =', stderr
 # forecasts from independent data.
 prob = calcprob(beta, fcst2)
 # compute Brier Skill Score
 verif = (obs2 > 0.).astype('f')
Esempio n. 42
def numeric_hamming4(num1, num2):
    assert len(num1) == len(num2)
    return numarray.sum(num1 != num2)
Esempio n. 43
def rosen(x):  # The Rosenbrock function
    return Num.sum(100.0 * (x[1:] - x[:-1]**2.0)**2.0 + (1 - x[:-1])**2.0)
Esempio n. 44
CalLeTriggered = numarray.where ( daSvac ['GemCalLeVector'] == 1, 1, 0 )

# The labels of the columns we will add to the data set
label_TkrTriggered = 'Number of Towers with Tkr Triggers'
label_CalLeTriggered = 'Number of Towers with CalLe Triggers'
label_TowerTkrTrigGemCond = 'Number of Towers with Tkr Triggers after Cut'
label_TowerCalLeTrigGemCond = 'Number of Towers with CalLe Triggers after Cut'

# For each event, take the sum of the columns.
# Second argument in sum() is 0 for sum of row and 1 for column
# Then add resulting vector as a new column to the data set
nbrTkrTriggered = numarray.sum ( TkrTriggered, 1 )
print "Verify shape of result of sum"
print nbrTkrTriggered.shape

daSvac [ label_TkrTriggered ]  = nbrTkrTriggered
nbrCalLeTriggered = numarray.sum ( CalLeTriggered, 1 )
daSvac [ label_CalLeTriggered ] = nbrCalLeTriggered
# Get the branch as an array and create a numarray withere the value
# is equal to 7
t = daSvac [ 'GemConditionsWord' ] == 7.0

# If GemConditionsWord == 7., then fill element with `nbrTkrTriggered',
# otherwise with -1
Esempio n. 45
    for index in range(len(psf_stars['data']['X'])):

        if x+xbox > data.getshape()[1] or x-xbox < 0 or y+ybox > data.getshape()[0] or y-ybox < 0:
        sec = data[t:b,l:r].copy()
        sec = shift(sec,(x-int(x),y-int(y)),order=3)
        obj =  N.where(sec > 2.0*average(average(sec)),1,0)
        sky2 = N.where(sec < 2.0*average(average(sec)),1,0)
        sky2 = N.sum(N.sum(sky2*sec))/N.sum(N.sum(sky2))

        (lab, nobj) = label(obj,structure=s)
        f = N.nd_image.find_objects(lab)
        msec = masked_outside(sec,sky2-5.0*sqrt(sky2),40000.)
        for i in range(1,nobj+1):
            if a*b < 10:
            if a/b < 0.5 or b/a < 0.5 or a>25 or b> 25:
                ### some part of chunk of the image has a wonky shape
                ### better skip this one.
Esempio n. 46
def itmean(arr, darr, thresh):
    arr = numarray.array(arr)
    darr = numarray.array(darr)
    print len(arr), len(darr)
    arr = numarray.compress(darr > 0., arr)
    darr = numarray.compress(darr > 0., darr)
    arr2 = arr
    darr2 = darr
    m = numarray.sum(arr / darr) / numarray.sum(1. / darr)
    lold = 0
    l = len(arr)
    while (l != lold):
        arr2 = numarray.compress(abs(arr - m) < thresh, arr)
        darr2 = numarray.compress(abs(arr - m) < thresh, darr)
        m = numarray.sum(arr2 / darr2) / numarray.sum(1. / darr2)
        lold = l
        l = len(arr2)
    print numarray.sum(arr / darr) / numarray.sum(1. / darr), numarray.sum(
        arr2 / darr2) / numarray.sum(1. / darr2), len(arr2), len(arr)
    return numarray.sum(arr2 / darr2) / numarray.sum(1. / darr2)
Esempio n. 47
def fig6c(xi, yi, zz):
    """ Plot cumulative metallicity distribution """
    feh = numarray.sum(zz, 1)
    pylab.plot(yi, feh)
Esempio n. 49
# convert bins (Hz) to k:
omega = 2 * N.pi * bins
k = W.WaveNumber(g, omega, h)

# convert to energy at the bottom:
Eb = Es / (N.sinh(k*h)**2)

##print "Energy at Top:",
##print Es[0:1]
##print "Energy at Bottom:",
##print Eb[0:1]
##print "Ratio:"
##print Eb[0:1] / Es[0:1]

# total energy
Etotal = N.sum(Eb, 1)# sum across rows

print "max energy", N.maximum.reduce(Etotal)

# Plot energy over time:
F = pylab.Figure()
ax = pylab.subplot(1,1,1)

#print "Position:", ax.get_position()
left, bottom, width, height = ax.get_position()
delta = 0.08
ax.set_position([left, bottom + delta, width, height-delta])

#pylab.plot(pylab.date2num(datetimes), Etotal )
#pylab.plot_date(pylab.date2num(datetimes), Etotal, "-" )
ax.plot_date(pylab.date2num(datetimes), Etotal, "-" )
Esempio n. 50
    def drawmeridians(self,ax,meridians,color='k',linewidth=1., \
 draw meridians (longitude lines).

 ax - current axis instance.
 meridians - list containing longitude values to draw (in degrees).
 color - color to draw meridians (default black).
 linewidth - line width for meridians (default 1.)
 linestyle - line style for meridians (default '--', i.e. dashed).
 dashes - dash pattern for meridians (default [1,1], i.e. 1 pixel on,
  1 pixel off).
 labels - list of 4 values (default [0,0,0,0]) that control whether
  meridians are labelled where they intersect the left, right, top or 
  bottom of the plot. For example labels=[1,0,0,1] will cause meridians
  to be labelled where they intersect the left and bottom of the plot,
  but not the right and top. Labels are located with a precision of 0.1
  degrees and are drawn using mathtext.
 font - mathtext font used for labels ('rm','tt','it' or 'cal', default 'rm'.
 fontsize - font size in points for labels (default 12).
        # don't draw meridians past latmax, always draw parallel at latmax.
        latmax = 80.  # not used for cyl, merc projections.
        # offset for labels.
        yoffset = (self.urcrnry - self.llcrnry) / 100. / self.aspect
        xoffset = (self.urcrnrx - self.llcrnrx) / 100.

        if self.projection not in ['merc', 'cyl']:
            lats = N.arange(-latmax, latmax + 1).astype('f')
            lats = N.arange(-90, 91).astype('f')
        xdelta = 0.1 * (self.xmax - self.xmin)
        ydelta = 0.1 * (self.ymax - self.ymin)
        for merid in meridians:
            lons = merid * N.ones(len(lats), 'f')
            x, y = self(lons, lats)
            # remove points outside domain.
            testx = N.logical_and(x >= self.xmin - xdelta,
                                  x <= self.xmax + xdelta)
            x = N.compress(testx, x)
            y = N.compress(testx, y)
            testy = N.logical_and(y >= self.ymin - ydelta,
                                  y <= self.ymax + ydelta)
            x = N.compress(testy, x)
            y = N.compress(testy, y)
            if len(x) > 1 and len(y) > 1:
                # split into separate line segments if necessary.
                # (not necessary for mercator or cylindrical).
                xd = (x[1:] - x[0:-1])**2
                yd = (y[1:] - y[0:-1])**2
                dist = N.sqrt(xd + yd)
                split = dist > 500000.
                if N.sum(split) and self.projection not in ['merc', 'cyl']:
                    ind = (N.compress(
                        split, pylab.squeeze(split * N.indices(xd.shape))) +
                    xl = []
                    yl = []
                    iprev = 0
                    for i in ind:
                        iprev = i
                    xl = [x]
                    yl = [y]
                # draw each line segment.
                for x, y in zip(xl, yl):
                    # skip if only a point.
                    if len(x) > 1 and len(y) > 1:
                        l = Line2D(x,
        # draw labels for meridians.
        # search along edges of map to see if parallels intersect.
        # if so, find x,y location of intersection and draw a label there.
        if self.projection == 'cyl':
            dx = 0.01
            dy = 0.01
        elif self.projection == 'merc':
            dx = 0.01
            dy = 1000
            dx = 1000
            dy = 1000
        for dolab, side in zip(labels, ['l', 'r', 't', 'b']):
            if not dolab: continue
            # for cyl or merc, don't draw meridians on left or right.
            if self.projection in ['cyl', 'merc'] and side in ['l', 'r']:
            if side in ['l', 'r']:
                nmax = int((self.ymax - self.ymin) / dy + 1)
                if self.urcrnry < self.llcrnry:
                    yy = self.llcrnry - dy * N.arange(nmax)
                    yy = self.llcrnry + dy * N.arange(nmax)
                if side == 'l':
                    lons, lats = self(self.llcrnrx * N.ones(yy.shape, 'f'),
                    lons, lats = self(self.urcrnrx * N.ones(yy.shape, 'f'),
                lons = N.where(lons < 0, lons + 360, lons)
                lons = [int(lon * 10) for lon in lons.tolist()]
                lats = [int(lat * 10) for lat in lats.tolist()]
                nmax = int((self.xmax - self.xmin) / dx + 1)
                if self.urcrnrx < self.llcrnrx:
                    xx = self.llcrnrx - dx * N.arange(nmax)
                    xx = self.llcrnrx + dx * N.arange(nmax)
                if side == 'b':
                    lons, lats = self(xx,
                                      self.llcrnry * N.ones(xx.shape, 'f'),
                    lons, lats = self(xx,
                                      self.urcrnry * N.ones(xx.shape, 'f'),
                lons = N.where(lons < 0, lons + 360, lons)
                lons = [int(lon * 10) for lon in lons.tolist()]
                lats = [int(lat * 10) for lat in lats.tolist()]
            for lon in meridians:
                if lon < 0: lon = lon + 360.
                # find index of meridian (there may be two, so
                # search from left and right).
                    nl = lons.index(int(lon * 10))
                    nl = -1
                    nr = len(lons) - lons[::-1].index(int(lon * 10)) - 1
                    nr = -1
                if lon > 180:
                    lonlab = r'$\%s{%g\/^{\circ}\/W}$' % (font,
                                                          N.fabs(lon - 360))
                elif lon < 180 and lon != 0:
                    lonlab = r'$\%s{%g\/^{\circ}\/E}$' % (font, lon)
                    lonlab = r'$\%s{%g\/^{\circ}}$' % (font, lon)
                # meridians can intersect each map edge twice.
                for i, n in enumerate([nl, nr]):
                    lat = lats[n] / 10.
                    # no meridians > latmax for projections other than merc,cyl.
                    if self.projection not in ['merc', 'cyl'] and lat > latmax:
                    # don't bother if close to the first label.
                    if i and abs(nr - nl) < 100: continue
                    if n > 0:
                        if side == 'l':
                            pylab.text(self.llcrnrx - xoffset,
                        elif side == 'r':
                            pylab.text(self.urcrnrx + xoffset,
                        elif side == 'b':
                                       self.llcrnry - yoffset,
                                       self.urcrnry + yoffset,

        # make sure axis ticks are turned off
Esempio n. 51
def matchum(file1,
    '''Take the output of two sextractor runs and match up the objects with
   each other (find out which objects in the first file match up with
   objects in the second file.  The routine considers a 'match' to be any 
   two objects that are closer than tol pixels (after applying the shift).  
   Returns a 6-tuple:  (x1,y1,x2,y2,o1,o2).  o1 and o2 are the ojbects
   numbers such that o1[i] in file 1 corresponds to o2[i] in file 2.'''
    NA = num.NewAxis

    sexdata1 = readsex(file1)
    sexdata2 = readsex(file2)

    # Use the readsex data to get arrays of the (x,y) positions
    x1 = num.asarray(sexdata1[0]['X_IMAGE'])
    y1 = num.asarray(sexdata1[0]['Y_IMAGE'])
    x2 = num.asarray(sexdata2[0]['X_IMAGE'])
    y2 = num.asarray(sexdata2[0]['Y_IMAGE'])
    m1 = num.asarray(sexdata1[0]['MAG_BEST'])
    m2 = num.asarray(sexdata2[0]['MAG_BEST'])
    o1 = num.asarray(sexdata1[0]['NUMBER'])
    o2 = num.asarray(sexdata2[0]['NUMBER'])
    f1 = num.asarray(sexdata1[0]['FLAGS'])
    f2 = num.asarray(sexdata2[0]['FLAGS'])

    # First, make a cut on the flags:
    gids = num.where(f1 < 4)
    x1 = x1[gids]
    y1 = y1[gids]
    m1 = m1[gids]
    o1 = o1[gids]
    gids = num.where(f2 < 4)
    x2 = x2[gids]
    y2 = y2[gids]
    m2 = m2[gids]
    o2 = o2[gids]

    # next, if there is a range to use:
    if xrange is not None and yrange is not None:
        cond = num.greater(x1, xrange[0])*num.less(x1,xrange[1])*\
              num.greater(y1, yrange[0])*num.less(y1,yrange[1])
        gids = num.where(cond)
        x1 = x1[gids]
        y1 = y1[gids]
        m1 = m1[gids]
        o1 = o1[gids]
        cond = num.greater(x2, xrange[0])*num.less(x2,xrange[1])*\
              num.greater(y2, yrange[0])*num.less(y2,yrange[1])
        gids = num.where(cond)
        x2 = x2[gids]
        y2 = y2[gids]
        m2 = m2[gids]
        o2 = o2[gids]

    # Use the user masks
    for m in im_masks1:
        print "applying mask (%d,%d,%d,%d)" % tuple(m)
        condx = num.less(x1, m[0]) + num.greater(x1, m[1])
        condy = num.less(y1, m[2]) + num.greater(y1, m[3])
        gids = num.where(condx + condy)
        x1 = x1[gids]
        y1 = y1[gids]
        m1 = m1[gids]
        o1 = o1[gids]

    for m in im_masks2:
        print "applying mask (%d,%d,%d,%d)" % tuple(m)
        condx = num.less(x2, m[0]) + num.greater(x2, m[1])
        condy = num.less(y2, m[2]) + num.greater(y2, m[3])
        gids = num.where(condx + condy)
        x2 = x2[gids]
        y2 = y2[gids]
        m2 = m2[gids]
        o2 = o2[gids]

    if nmax:
        if len(x1) > nmax:
            ids = num.argsort(m1)[0:nmax]
            x1 = x1[ids]
            y1 = y1[ids]
            m1 = m1[ids]
            o1 = o1[ids]
        if len(x2) > nmax:
            ids = num.argsort(m2)[0:nmax]
            x2 = x2[ids]
            y2 = y2[ids]
            m2 = m2[ids]
            o2 = o2[ids]
    if debug:
        print "objects in frame 1:"
        print o1
        print "objects in frame 2:"
        print o2
        mp = pygplot.MPlot(2, 1, device='/XWIN')
        p = pygplot.Plot()
        p.point(x1, y1)
        [p.label(x1[i], y1[i], "%d" % o1[i]) for i in range(len(x1))]
        p = pygplot.Plot()
        p.point(x2, y2)
        [p.label(x2[i], y2[i], "%d" % o2[i]) for i in range(len(x2))]

    # Now, we make 2-D arrays of all the differences in x and y between each pair
    #  of objects.  e.g., dx1[n,m] is the delta-x between object n and m in file 1 and
    #  dy2[n,m] is the y-distance between object n and m in file 2.
    dx1 = x1[NA, :] - x1[:, NA]
    dx2 = x2[NA, :] - x2[:, NA]
    dy1 = y1[NA, :] - y1[:, NA]
    dy2 = y2[NA, :] - y2[:, NA]
    # Same, but with angles
    da1 = num.arctan2(dy1, dx1) * 180 / num.pi
    da2 = num.arctan2(dy2, dx2) * 180 / num.pi
    # Same, but with absolute distances
    ds1 = num.sqrt(num.power(dx1, 2) + num.power(dy1, 2))
    ds2 = num.sqrt(num.power(dx2, 2) + num.power(dy2, 2))

    # Here's the real magic:  this is a matrix of matrices (4-D).  Consider 4 objects:
    #  objects i and j in file 1 and objects m and n in file 2.  dx[i,j,m,n] is the
    #  difference between delta-xs for objects i,j in file 1 and m,n in file 2.  If object
    #  i corresponds to object m and object j corresponds to object n, this should be a small
    #  number, irregardless of an overall shift in coordinate systems between file 1 and 2.
    dx = dx1[::, ::, NA, NA] - dx2[NA, NA, ::, ::]
    dy = dy1[::, ::, NA, NA] - dy2[NA, NA, ::, ::]
    da = da1[::, ::, NA, NA] - da2[NA, NA, ::, ::] + aoffset
    ds = ds1[::, ::, NA, NA] - ds2[NA, NA, ::, ::]
    # pick out close pairs.
    #use = num.less(dy,perr)*num.less(dx,perr)*num.less(num.abs(da),aerr)
    use = num.less(ds, perr) * num.less(num.abs(da), aerr)
    use = use.astype(num.Int32)

    #use = num.less(num.abs(da),perr)
    suse = num.add.reduce(num.add.reduce(use, 3), 1)
    print suse[0]

    guse = num.greater(suse, suse.flat.max() / 2)
    i = [j for j in range(x1.shape[0]) if num.sum(guse[j])]
    m = [num.argmax(guse[j]) for j in range(x1.shape[0]) if num.sum(guse[j])]
    xx0, yy0, oo0, mm0 = num.take([x1, y1, o1, m1], i, 1)
    xx1, yy1, oo1, mm1 = num.take([x2, y2, o2, m2], m, 1)
    if debug:
        mp = pygplot.MPlot(2, 1, device='/XWIN')
        p = pygplot.Plot()
        p.point(xx0, yy0)
        [p.label(xx0[i], yy0[i], "%d" % oo0[i]) for i in range(len(xx0))]
        p = pygplot.Plot()
        p.point(xx1, yy1)
        [p.label(xx1[i], yy1[i], "%d" % oo1[i]) for i in range(len(xx1))]
    xshift, xscat = stats.bwt(xx0 - xx1)
    xscat = max([1.0, xscat])
    yshift, yscat = stats.bwt(yy0 - yy1)
    yscat = max([1.0, yscat])
    mshift, mscat = stats.bwt(mm0 - mm1)
    print "xscat = ", xscat
    print "yscat = ", yscat
    print "xshift = ", xshift
    print "yshift = ", yshift
    print "mshift = ", mshift
    print "mscat = ", mscat
    keep = num.less(num.abs(xx0-xx1-xshift),sigma*xscat)*\
    # This is a list of x,y,object# in each file.
    xx0, yy0, oo0, xx1, yy1, oo1 = num.compress(keep,
                                                [xx0, yy0, oo0, xx1, yy1, oo1],

    if debug:
        print file1, oo0
        print file2, oo1
        mp = pygplot.MPlot(2, 1, device='')
        p1 = pygplot.Plot()
        p1.point(xx0, yy0, symbol=25, color='red')
        for i in range(len(xx0)):
            p1.label(xx0[i], yy0[i], " %d" % oo0[i], color='red')
        p2 = pygplot.Plot()
        p2.point(xx1, yy1, symbol=25, color='green')
        for i in range(len(xx1)):
            p2.label(xx1[i], yy1[i], " %d" % oo1[i], color='green')

    if domags:
        return (xx0, yy0, mm0, xx1, yy1, mm1, mshift, mscat, oo0, oo1)
        return (xx0, yy0, xx1, yy1, oo0, oo1)
Esempio n. 52
    def _deltas(
            train_toks,  #fd_list, labeled_tokens, labels,
        Calculate the update values for the classifier weights for
        this iteration of IIS.  These update weights are the value of
        C{delta} that solves the equation::
          SUM[t,l] (classifier.prob(LabeledText(t,l)) *
                    fd_list.detect(LabeledText(t,l))[i] *
                    exp(delta[i] * nf(LabeledText(t,l))))

            - M{t} is a text C{labeled_tokens}
            - M{l} is an element of C{labels}
            - M{nf(ltext)} = SUM[M{j}] C{fd_list.detect}(M{ltext})[M{j}] 

        This method uses Newton's method to solve this equation for
        M{delta[i]}.  In particular, it starts with a guess of
        C{delta[i]}=1; and iteratively updates C{delta} with::

            delta[i] -= (ffreq_emperical[i] - sum1[i])/(-sum2[i])

        until convergence, where M{sum1} and M{sum2} are defined as::
          sum1 = SUM[t,l] (classifier.prob(LabeledText(t,l)) *
                           fd_list.detect(LabeledText(t,l))[i] *
                           exp(delta[i] * nf(LabeledText(t,l))))
          sum2 = SUM[t,l] (classifier.prob(LabeledText(t,l)) *
                           fd_list.detect(LabeledText(t,l))[i] *
                           nf(LabeledText(t,l)) *
                           exp(delta[i] * nf(LabeledText(t,l))))

        Note that M{sum1} and M{sum2} depend on C{delta}; so they need
        to be re-computed each iteration.
        The variables C{nfmap}, C{nfarray}, and C{nftranspose} are
        used to generate a dense encoding for M{nf(ltext)}.  This
        allows C{_deltas} to calculate M{sum1} and M{sum2} using
        matrices, which yields a signifigant performance improvement. 

        @param fd_list: The feature detector list for the classifier
            that this C{IISMaxentClassifierTrainer} is training.
        @type fd_list: C{FeatureDetectorListI}
        @param labeled_tokens: The set of training tokens.
        @type labeled_tokens: C{list} of C{Token} with C{LabeledText}
        @param labels: The set of labels that should be considered by
            the classifier constructed by this
        @type labels: C{list} of (immutable)
        @param classifier: The current classifier.
        @type classifier: C{ClassifierI}
        @param ffreq_emperical: An array containing the emperical
            frequency for each feature.  The M{i}th element of this
            array is the emperical frequency for feature M{i}.
        @type ffreq_emperical: C{sequence} of C{float}
        @param unattested: An array that is 1 for features that are
            not attested in the training data; and 0 for features that
            are attested.  In other words, C{unattested[i]==0} iff
        @type unattested: C{sequence} of C{int}
        @param nfmap: A map that can be used to compress C{nf} to a dense
        @type nfmap: C{dictionary} from C{int} to C{int}
        @param nfarray: An array that can be used to uncompress C{nf}
            from a dense vector.
        @type nfarray: C{array} of C{float}
        @param nftranspose: C{array} of C{float}
        @type nftranspose: The transpose of C{nfarray}
        # These parameters control when we decide that we've
        # converged.  It probably should be possible to set these
        # manually, via keyword arguments to train.
        NEWTON_CONVERGE = 1e-12
        MAX_NEWTON = 30

        deltas = numarray.ones(self._weight_vector_len, 'd')

        # Precompute the A matrix:
        # A[nf][id] = sum ( p(text) * p(label|text) * f(text,label) )
        # over all label,text s.t. num_features[label,text]=nf
        A = numarray.zeros((len(nfmap), self._weight_vector_len), 'd')

        for i, tok in enumerate(train_toks):
            dist = classifier.get_class_probs(tok)

            # Find the number of active features.
            feature_vector = tok['FEATURE_VECTOR']
            assignments = feature_vector.assignments()
            nf = sum([val for (id, val) in assignments])

            # Update the A matrix
            for cls, offset in self._offsets.items():
                for (id, val) in assignments:
                    A[nfmap[nf], id + offset] += dist.prob(cls) * val
        A /= len(train_toks)

        # Iteratively solve for delta.  Use the following variables:
        #   - nf_delta[x][y] = nf[x] * delta[y]
        #   - exp_nf_delta[x][y] = exp(nf[x] * delta[y])
        #   - nf_exp_nf_delta[x][y] = nf[x] * exp(nf[x] * delta[y])
        #   - sum1[i][nf] = sum p(text)p(label|text)f[i](label,text)
        #                       exp(delta[i]nf)
        #   - sum2[i][nf] = sum p(text)p(label|text)f[i](label,text)
        #                       nf exp(delta[i]nf)
        for rangenum in range(MAX_NEWTON):
            nf_delta = numarray.outerproduct(nfarray, deltas)
            exp_nf_delta = numarray.exp(nf_delta)
            nf_exp_nf_delta = nftranspose * exp_nf_delta
            sum1 = numarray.sum(exp_nf_delta * A)
            sum2 = numarray.sum(nf_exp_nf_delta * A)

            # Avoid division by zero.
            sum2 += unattested

            # Update the deltas.
            deltas -= (ffreq_emperical - sum1) / -sum2

            # We can stop once we converge.
            n_error = (numarray.sum(abs(
                (ffreq_emperical - sum1))) / numarray.sum(abs(deltas)))
            if n_error < NEWTON_CONVERGE:
                return deltas

        return deltas