Python searchsorted Examples, numarray.searchsorted Python Examples

Example #1

0

Show file

def interpolate(points, t):
    """points=(x,y), list of x's and y's. "t" ... list of x, for which we want
    to evaluate the dependence "y". returns a list of "y" evaluated at the grid
    "t".
    Assumes t is sorted in ascending order
    Assumes points x axis are sorted in ascending order
    """
    xx, yy = points
    x = t

    t0 = numarray.compress(x <= min(xx), x)
    #   t1 = numarray.compress( (x>min(xx)) & (x<max(xx)), x )
    t1 = numarray.compress((x > min(xx)) & (x < xx[-1]), x)
    #   t2 = numarray.compress(x>=max(xx), x)
    t2 = numarray.compress(x >= xx[-1], x)
    if (xx[1] - xx[0] == 0):
        slope0 = sign(yy[1] - yy[0]) * 1e20
    else:
        slope0 = (yy[1] - yy[0]) / (xx[1] - xx[0])
    slope2 = (yy[-1] - yy[-2]) / (xx[-1] - xx[-2])
    indices = numarray.searchsorted(xx, t1)
    x0 = xx[indices - 1]
    x1 = xx[indices]
    y0 = yy[indices - 1]
    y1 = yy[indices]
    slope = (y1 - y0) / (x1 - x0)

    y1 = slope * (t1 - x0) + y0
    y0 = slope0 * (t0 - xx[0]) + yy[0]  # extrapolate
    y2 = slope2 * (t2 - xx[-1]) + yy[-1]  # extrapolate

    y = numarray.concatenate((y0, y1, y2))
    return y

Example #2

0

Show file

File: bezier.py Project: certik/chemev

def interpolate(points,t):
    """points=(x,y), list of x's and y's. "t" ... list of x, for which we want
    to evaluate the dependence "y". returns a list of "y" evaluated at the grid
    "t".
    Assumes t is sorted in ascending order
    Assumes points x axis are sorted in ascending order
    """
    xx,yy=points
    x=t

    t0 = numarray.compress(x<=min(xx), x)
#   t1 = numarray.compress( (x>min(xx)) & (x<max(xx)), x )
    t1 = numarray.compress( (x>min(xx)) & (x<xx[-1]), x )
#   t2 = numarray.compress(x>=max(xx), x)
    t2 = numarray.compress(x>=xx[-1], x)
    if (xx[1]-xx[0] == 0):
        slope0 = sign(yy[1]-yy[0])*1e20
    else:
        slope0 = (yy[1]-yy[0])/(xx[1]-xx[0])
    slope2 = (yy[-1]-yy[-2])/(xx[-1]-xx[-2])
    indices = numarray.searchsorted(xx,t1)
    x0 = xx[indices-1]
    x1 = xx[indices]
    y0 = yy[indices-1]
    y1 = yy[indices]
    slope = (y1-y0)/(x1-x0)

    y1 = slope*(t1-x0)+y0
    y0 = slope0*(t0-xx[0])+yy[0]    # extrapolate
    y2 = slope2*(t2-xx[-1])+yy[-1]    # extrapolate

    y = numarray.concatenate((y0,y1,y2))
    return y

Example #3

0

Show file

File: plotsmooth.py Project: certik/chemev

def fig3e(t,m,w,p):
    """ Plot image of log(age),metallicity weights -- nearest neighbor interpolation"""
    xi = pylab.linspace(8,10.25,512)
    yi = pylab.linspace(-2.5,0.9,512)
    z = numarray.zeros((len(xi),len(yi)),numarray.Float) # create t, metallicity array
    x = t
    y = m
    # Find the indices in the array
    xindex = numarray.searchsorted(xi,x)
    if p.sigma > 0:
        for i in range(len(y)):
            nstars = w[i]*normgauss(yi,y[i],p.sigma)
            print "shape(z),len(nstars)", numarray.shape(z), len(nstars)
            z[xindex[i],:] += nstars
    else:
        yindex = numarray.searchsorted(yi,y)
        z[xindex,yindex] = z[xindex,yindex] + weight # increment the 2-d array
    zz = numarray.transpose(z)
    pylab.imshow(zz,extent=[8,10.25,-2.5,0.9],aspect='auto')

Example #4

0

Show file

def fig3e(t, m, w, p):
    """ Plot image of log(age),metallicity weights -- nearest neighbor interpolation"""
    xi = pylab.linspace(8, 10.25, 512)
    yi = pylab.linspace(-2.5, 0.9, 512)
    z = numarray.zeros((len(xi), len(yi)),
                       numarray.Float)  # create t, metallicity array
    x = t
    y = m
    # Find the indices in the array
    xindex = numarray.searchsorted(xi, x)
    if p.sigma > 0:
        for i in range(len(y)):
            nstars = w[i] * normgauss(yi, y[i], p.sigma)
            print "shape(z),len(nstars)", numarray.shape(z), len(nstars)
            z[xindex[i], :] += nstars
    else:
        yindex = numarray.searchsorted(yi, y)
        z[xindex,
          yindex] = z[xindex, yindex] + weight  # increment the 2-d array
    zz = numarray.transpose(z)
    pylab.imshow(zz, extent=[8, 10.25, -2.5, 0.9], aspect='auto')

Example #5

0

Show file

def fig6(m, w):  # Metallicity distribution
    """ fig6(t,w) -- differential metallicity distribution """
    mmin, mmax, mstep = -2.5, 1.0, 0.2
    mmin = mmin - mstep
    mbins = numarray.arange(mmin, mmax, mstep)
    indices = numarray.searchsorted(mbins, m)
    weight = numarray.zeros(len(mbins)) * 0.
    for i in range(len(m)):
        weight[indices[i]] += w[i]
    pylab.bar(mbins[1:], weight[:-1], width=mstep, edgecolor=None)
    pylab.xlabel("[Fe/H]")
    pylab.ylabel("N")
    return mbins[1:], weight[:-1]

Example #6

0

Show file

File: plotsmooth.py Project: certik/chemev

def fig6(m,w): # Metallicity distribution
    """ fig6(t,w) -- differential metallicity distribution """
    mmin,mmax,mstep = -2.5,1.0,0.2
    mmin = mmin-mstep
    mbins = numarray.arange(mmin,mmax,mstep)
    indices = numarray.searchsorted(mbins,m)
    weight = numarray.zeros(len(mbins))*0.
    for i in range(len(m)):
        weight[indices[i]] += w[i] 
    pylab.bar(mbins[1:],weight[:-1],width=mstep,edgecolor=None)
    pylab.xlabel("[Fe/H]")
    pylab.ylabel("N") 
    return mbins[1:],weight[:-1]

Example #7

0

Show file

def fig7b(t, w):  # Age distribution
    """ fig7b(t,w) -- differential age distribution """
    tmin, tmax, tstep = 0.0, 14.5, 0.5
    tmin = tmin - tstep
    tbins = numarray.arange(tmin, tmax, tstep)
    t_gyr = 10.**t / 1.e9
    indices = numarray.searchsorted(tbins, t_gyr)
    weight = numarray.zeros(len(tbins)) * 0.
    for i in range(len(w)):
        weight[indices[i]] += w[i]
    pylab.bar(tbins[1:], weight[:-1], width=tstep, edgecolor=None)
    pylab.xlabel("Age (Gyr)")
    pylab.ylabel("N")
    return tbins[1:], weight[:-1]

Example #8

0

Show file

File: plotsmooth.py Project: certik/chemev

def fig7b(t,w): # Age distribution
    """ fig7b(t,w) -- differential age distribution """
    tmin,tmax,tstep = 0.0,14.5,0.5
    tmin = tmin-tstep
    tbins = numarray.arange(tmin,tmax,tstep)
    t_gyr=10.**t/1.e9
    indices = numarray.searchsorted(tbins,t_gyr)
    weight = numarray.zeros(len(tbins))*0.
    for i in range(len(w)):
        weight[indices[i]] += w[i] 
    pylab.bar(tbins[1:],weight[:-1],width=tstep,edgecolor=None)
    pylab.xlabel("Age (Gyr)")
    pylab.ylabel("N") 
    return tbins[1:],weight[:-1]

Example #9

0

Show file

def fig3b(t, m, w, p):
    """ Plot image of age,metallicity weights -- nearest neighbor interpolation"""
    NREP = 50
    xi = pylab.linspace(0.1, 15, 512)
    yi = pylab.linspace(-2.5, 0.9, 512)
    z = numarray.zeros((len(xi), len(yi)),
                       numarray.Float)  # create t, metallicity array
    y = numarray.repeat(m, NREP)
    dt = t[1] - t[0]
    x = numarray.arange(t[0], t[-1] + 2 * dt, dt / NREP)
    x = x[0:len(y)]
    x = 10.**x / 1.e9
    weight = numarray.repeat(w, NREP)
    # Find the indices in the array
    xindex = numarray.searchsorted(xi, x)
    print "shape(x), shape(y), shape(weight)", numarray.shape(
        x), numarray.shape(y), numarray.shape(weight)
    if p.sigma > 0:
        if p.dsigmadlogt == 0.:
            for i in range(len(y)):
                nstars = weight[i] * normgauss(yi, y[i], p.sigma)
                j = xindex[i]
                z[j, :] += nstars
        if p.dsigmadlogt != 0.:
            for i in range(len(y)):
                logt0 = numarray.log10(x[0])
                logt = numarray.log10(x[i])
                sigma = p.sigma + p.dsigmadlogt * (logt - logt0)
                nstars = weight[i] * normgauss(yi, y[i], sigma)
                j = xindex[i]
                z[j, :] += nstars
    else:
        sigma = 0.01
        for i in range(len(y)):
            nstars = weight[i] * normgauss(yi, y[i], sigma)
            j = xindex[i]
            z[j, :] += nstars


#       yindex = numarray.searchsorted(yi,y)
#       z[xindex,yindex] = z[xindex,yindex] + weight # increment the 2-d array
    zz = numarray.transpose(z)
    pylab.imshow(zz, extent=[0.1, 15, -2.5, 0.9], aspect='auto')
    pylab.xlabel("Age (Gyr)")
    pylab.ylabel("[Fe/H]")
    return xi, yi, zz

Example #10

0

Show file

File: plotsmooth.py Project: certik/chemev

def fig3b(t,m,w,p):
    """ Plot image of age,metallicity weights -- nearest neighbor interpolation"""
    NREP = 50
    xi = pylab.linspace(0.1,15,512)
    yi = pylab.linspace(-2.5,0.9,512)
    z = numarray.zeros((len(xi),len(yi)),numarray.Float) # create t, metallicity array
    y = numarray.repeat(m,NREP)
    dt = t[1]-t[0]
    x = numarray.arange(t[0],t[-1]+2*dt,dt/NREP)
    x = x[0:len(y)]
    x = 10.**x/1.e9
    weight = numarray.repeat(w,NREP)
    # Find the indices in the array
    xindex = numarray.searchsorted(xi,x)
    print "shape(x), shape(y), shape(weight)", numarray.shape(x),numarray.shape(y), numarray.shape(weight)
    if p.sigma > 0:
        if p.dsigmadlogt == 0.:
            for i in range(len(y)):
                nstars = weight[i]*normgauss(yi,y[i],p.sigma)
                j = xindex[i]
                z[j,:] += nstars
        if p.dsigmadlogt != 0.:
            for i in range(len(y)):
                logt0 = numarray.log10(x[0])
                logt = numarray.log10(x[i])
                sigma = p.sigma + p.dsigmadlogt*(logt-logt0)
                nstars = weight[i]*normgauss(yi,y[i],sigma)
                j = xindex[i]
                z[j,:] += nstars
    else:
        sigma = 0.01
        for i in range(len(y)):
            nstars = weight[i]*normgauss(yi,y[i],sigma)
            j = xindex[i]
            z[j,:] += nstars
#       yindex = numarray.searchsorted(yi,y)
#       z[xindex,yindex] = z[xindex,yindex] + weight # increment the 2-d array
    zz = numarray.transpose(z)
    pylab.imshow(zz,extent=[0.1,15,-2.5,0.9],aspect='auto')
    pylab.xlabel("Age (Gyr)")
    pylab.ylabel("[Fe/H]")
    return xi,yi,zz

Example #11

0

Show file

File: recipe-335390.py Project: bhramoss/code

def find_closest(input_array, target_array, tol):
    """

    Find the set of elements in input_array that are closest to
    elements in target_array.  Record the indices of the elements in
    target_array that are within tolerance, tol, of their closest
    match. Also record the indices of the elements in target_array
    that are outside tolerance, tol, of their match.

    For example, given an array of observations with irregular
    observation times along with an array of times of interest, this
    routine can be used to find those observations that are closest to
    the times of interest that are within a given time tolerance.

    NOTE: input_array must be sorted! The array, target_array, does not have to be sorted.

    Inputs:
      input_array:  a sorted Float64 numarray
      target_array: a Float64 numarray
      tol:          a tolerance

    Returns:
      closest_indices:  the array of indices of elements in input_array that are closest to elements in target_array
      accept_indices:  the indices of elements in target_array that have a match in input_array within tolerance
      reject_indices:  the indices of elements in target_array that do not have a match in input_array within tolerance
    """

    input_array_len = len(input_array)
    closest_indices = numarray.searchsorted(input_array, target_array) # determine the locations of target_array in input_array
    acc_rej_indices = [-1] * len(target_array)
    curr_tol = [tol] * len(target_array)

    est_tol = 0.0
    for i in xrange(len(target_array)):
        best_off = 0          # used to adjust closest_indices[i] for best approximating element in input_array

        if closest_indices[i] >= input_array_len:
            # the value target_array[i] is >= all elements in input_array so check whether it is within tolerance of the last element
            closest_indices[i] = input_array_len - 1
            est_tol = target_array[i] - input_array[closest_indices[i]]
            if est_tol < curr_tol[i]:
                curr_tol[i] = est_tol
                acc_rej_indices[i] = i
        elif target_array[i] == input_array[closest_indices[i]]:
            # target_array[i] is in input_array
            est_tol = 0.0
            curr_tol[i] = 0.0
            acc_rej_indices[i] = i
        elif closest_indices[i] == 0:
            # target_array[i] is <= all elements in input_array
            est_tol = input_array[0] - target_array[i]
            if est_tol < curr_tol[i]:
                curr_tol[i] = est_tol
                acc_rej_indices[i] = i
        else:
            # target_array[i] is between input_array[closest_indices[i]-1] and input_array[closest_indices[i]]
            # and closest_indices[i] must be > 0
            top_tol = input_array[closest_indices[i]] - target_array[i]
            bot_tol = target_array[i] - input_array[closest_indices[i]-1]
            if bot_tol <= top_tol:
                est_tol = bot_tol
                best_off = -1           # this is the only place where best_off != 0
            else:
                est_tol = top_tol

            if est_tol < curr_tol[i]:
                curr_tol[i] = est_tol
                acc_rej_indices[i] = i

        if est_tol <= tol:
            closest_indices[i] += best_off

    accept_indices = numarray.compress(numarray.greater(acc_rej_indices, -1), acc_rej_indices)
    reject_indices = numarray.compress(numarray.equal(acc_rej_indices, -1), numarray.arange(len(acc_rej_indices)))
    return (closest_indices, accept_indices, reject_indices)

Example #12

0

Show file

def interp(datain,lonsin,latsin,lonsout,latsout,checkbounds=False,mode='nearest',cval=0.0,order=3):
    """
 dataout = interp(datain,lonsin,latsin,lonsout,latsout,mode='constant',cval=0.0,order=3)

 interpolate data (datain) on a rectilinear lat/lon grid (with lons=lonsin
 lats=latsin) to a grid with lons=lonsout, lats=latsout.

 datain is a rank-2 array with 1st dimension corresponding to longitude,
 2nd dimension latitude.

 lonsin, latsin are rank-1 Numeric arrays containing longitudes and latitudes
 of datain grid in increasing order (i.e. from Greenwich meridian eastward, and
 South Pole northward)

 lonsout, latsout are rank-2 Numeric arrays containing lons and lats out desired
 output grid (typically a native map projection grid).

 If checkbounds=True, values of lonsout and latsout are checked to see that
 they lie within the range specified by lonsin and latsing.  Default is
 False, and values outside the borders are handled in the manner described
 by the 'mode' parameter (default mode='nearest', which means the nearest
 boundary value is used). See section 20.2 of the numarray docs for 
 information on the 'mode' keyword.

 See numarray.nd_image.map_coordinates documentation for information on
 the other optional keyword parameters.  The order keyword can be 0 
 for nearest neighbor interpolation (nd_image only allows 1-6) - if
 order=0 bounds checking is done even if checkbounds=False.
    """
    # lonsin and latsin must be monotonically increasing.
    if lonsin[-1]-lonsin[0] < 0 or latsin[-1]-latsin[0] < 0:
        raise ValueError, 'lonsin and latsin must be increasing!'
    # optionally, check that lonsout,latsout are 
    # within region defined by lonsin,latsin.
    # (this check is always done if nearest neighbor 
    # interpolation (order=0) requested).
    if checkbounds or order == 0:
        if min(N.ravel(lonsout)) < min(lonsin) or \
           max(N.ravel(lonsout)) > max(lonsin) or \
           min(N.ravel(latsout)) < min(latsin) or \
           max(N.ravel(latsout)) > max(latsin):
            raise ValueError, 'latsout or lonsout outside range of latsin or lonsin'
    # compute grid coordinates of output grid.
    delon = lonsin[1:]-lonsin[0:-1]
    delat = latsin[1:]-latsin[0:-1]
    if max(delat)-min(delat) < 1.e-4 and max(delon)-min(delon) < 1.e-4:
        # regular input grid.
        xcoords = (len(lonsin)-1)*(lonsout-lonsin[0])/(lonsin[-1]-lonsin[0])
        ycoords = (len(latsin)-1)*(latsout-latsin[0])/(latsin[-1]-latsin[0])
    else:
        # irregular (but still rectilinear) input grid.
        lonsoutflat = N.ravel(lonsout)
        latsoutflat = N.ravel(latsout)
        ix = N.searchsorted(lonsin,lonsoutflat)-1
        iy = N.searchsorted(latsin,latsoutflat)-1
        xcoords = N.zeros(ix.shape,'f')
        ycoords = N.zeros(iy.shape,'f')
        for n,i in enumerate(ix):
            if i < 0:
                xcoords[n] = -1 # outside of range on lonsin (lower end)
            elif i >= len(lonsin)-1:
                xcoords[n] = len(lonsin) # outside range on upper end.
            else:
                xcoords[n] = float(i)+(lonsoutflat[n]-lonsin[i])/(lonsin[i+1]-lonsin[i])
        xcoords = N.reshape(xcoords,lonsout.shape)
        for m,j in enumerate(iy):
            if j < 0:
                ycoords[m] = -1 # outside of range of latsin (on lower end)
            elif j >= len(latsin)-1:
                ycoords[m] = len(latsin) # outside range on upper end
            else:
                ycoords[m] = float(j)+(latsoutflat[m]-latsin[j])/(latsin[j+1]-latsin[j])
        ycoords = N.reshape(ycoords,latsout.shape)
    coords = [ycoords,xcoords]
    # interpolate to output grid using numarray.nd_image spline filter.
    if order:
        return nd_image.map_coordinates(datain,coords,mode=mode,cval=cval,order=order)
    else:
        # nearest neighbor interpolation if order=0.
        # uses index arrays, so first convert to numarray.
        datatmp = N.array(datain,datain.typecode())
        xi = N.around(xcoords).astype('i')
        yi = N.around(ycoords).astype('i')
        return datatmp[yi,xi]

Example #13

0

Show file

File: basemap.py Project: jtomase/matplotlib

def interp(datain,
           lonsin,
           latsin,
           lonsout,
           latsout,
           checkbounds=False,
           mode='nearest',
           cval=0.0,
           order=3):
    """
 dataout = interp(datain,lonsin,latsin,lonsout,latsout,mode='constant',cval=0.0,order=3)

 interpolate data (datain) on a rectilinear lat/lon grid (with lons=lonsin
 lats=latsin) to a grid with lons=lonsout, lats=latsout.

 datain is a rank-2 array with 1st dimension corresponding to longitude,
 2nd dimension latitude.

 lonsin, latsin are rank-1 Numeric arrays containing longitudes and latitudes
 of datain grid in increasing order (i.e. from Greenwich meridian eastward, and
 South Pole northward)

 lonsout, latsout are rank-2 Numeric arrays containing lons and lats out desired
 output grid (typically a native map projection grid).

 If checkbounds=True, values of lonsout and latsout are checked to see that
 they lie within the range specified by lonsin and latsing.  Default is
 False, and values outside the borders are handled in the manner described
 by the 'mode' parameter (default mode='nearest', which means the nearest
 boundary value is used). See section 20.2 of the numarray docs for 
 information on the 'mode' keyword.

 See numarray.nd_image.map_coordinates documentation for information on
 the other optional keyword parameters.  The order keyword can be 0 
 for nearest neighbor interpolation (nd_image only allows 1-6) - if
 order=0 bounds checking is done even if checkbounds=False.
    """
    # lonsin and latsin must be monotonically increasing.
    if lonsin[-1] - lonsin[0] < 0 or latsin[-1] - latsin[0] < 0:
        raise ValueError, 'lonsin and latsin must be increasing!'
    # optionally, check that lonsout,latsout are
    # within region defined by lonsin,latsin.
    # (this check is always done if nearest neighbor
    # interpolation (order=0) requested).
    if checkbounds or order == 0:
        if min(N.ravel(lonsout)) < min(lonsin) or \
           max(N.ravel(lonsout)) > max(lonsin) or \
           min(N.ravel(latsout)) < min(latsin) or \
           max(N.ravel(latsout)) > max(latsin):
            raise ValueError, 'latsout or lonsout outside range of latsin or lonsin'
    # compute grid coordinates of output grid.
    delon = lonsin[1:] - lonsin[0:-1]
    delat = latsin[1:] - latsin[0:-1]
    if max(delat) - min(delat) < 1.e-4 and max(delon) - min(delon) < 1.e-4:
        # regular input grid.
        xcoords = (len(lonsin) - 1) * (lonsout - lonsin[0]) / (lonsin[-1] -
                                                               lonsin[0])
        ycoords = (len(latsin) - 1) * (latsout - latsin[0]) / (latsin[-1] -
                                                               latsin[0])
    else:
        # irregular (but still rectilinear) input grid.
        lonsoutflat = N.ravel(lonsout)
        latsoutflat = N.ravel(latsout)
        ix = N.searchsorted(lonsin, lonsoutflat) - 1
        iy = N.searchsorted(latsin, latsoutflat) - 1
        xcoords = N.zeros(ix.shape, 'f')
        ycoords = N.zeros(iy.shape, 'f')
        for n, i in enumerate(ix):
            if i < 0:
                xcoords[n] = -1  # outside of range on lonsin (lower end)
            elif i >= len(lonsin) - 1:
                xcoords[n] = len(lonsin)  # outside range on upper end.
            else:
                xcoords[n] = float(i) + (lonsoutflat[n] - lonsin[i]) / (
                    lonsin[i + 1] - lonsin[i])
        xcoords = N.reshape(xcoords, lonsout.shape)
        for m, j in enumerate(iy):
            if j < 0:
                ycoords[m] = -1  # outside of range of latsin (on lower end)
            elif j >= len(latsin) - 1:
                ycoords[m] = len(latsin)  # outside range on upper end
            else:
                ycoords[m] = float(j) + (latsoutflat[m] - latsin[j]) / (
                    latsin[j + 1] - latsin[j])
        ycoords = N.reshape(ycoords, latsout.shape)
    coords = [ycoords, xcoords]
    # interpolate to output grid using numarray.nd_image spline filter.
    if order:
        return nd_image.map_coordinates(datain,
                                        coords,
                                        mode=mode,
                                        cval=cval,
                                        order=order)
    else:
        # nearest neighbor interpolation if order=0.
        # uses index arrays, so first convert to numarray.
        datatmp = N.array(datain, datain.typecode())
        xi = N.around(xcoords).astype('i')
        yi = N.around(ycoords).astype('i')
        return datatmp[yi, xi]

Example #14

0

Show file

File: p_gene_factor.py Project: polyactis/annot

	def group_data(self, data_list_2d, key_column=0, no_of_groups=6, group_size=None, cluster_column=-1):
		"""
		03-30-05
			output: a dictionary
			group the data based on the key_column, but each key has similar amount of clusters
			from cluster_column. idea is similar to equal.count().
		"""
		sys.stderr.write("Grouping data...")
		data_array = array(data_list_2d)
		cluster_list = list(data_array[:,cluster_column])
		cluster_set = Set(cluster_list)
		if group_size:
			unit_length = group_size
		else:
			unit_length = len(cluster_set)/no_of_groups
		
		#06-28-05 construct a key 2 set of cluster_id(mcl_id)'s
		key2cluster_set = {}
		for i in range(len(data_array)):
			key = data_array[i,key_column]
			cluster_id = data_array[i, cluster_column]
			if key not in key2cluster_set:
				key2cluster_set[key] = Set()
			key2cluster_set[key].add(cluster_id)
		if self.debug:
			print key2cluster_set
			raw_input("pause:")
		
		#06-28-05 convert key2cluster_set to a 2d list. and sort it based on key
		key_cluster_2d_list = []
		for key,cluster_set in key2cluster_set.iteritems():
			key_cluster_2d_list.append([key,cluster_set])
		key_cluster_2d_list.sort()
		if self.debug:
			print key_cluster_2d_list
			raw_input("pause:")
		
		#06-28-05	construct the boundaries for bin's
		bin_boundaries = [key_cluster_2d_list[0][0]]	#first key is already pushed in
		bin_set = Set()
		for key,cluster_set in key_cluster_2d_list:
			if len(bin_set) < unit_length:
				#the limit hasn't been reached.
				bin_set |= cluster_set
			else:
				#restart
				bin_boundaries.append(key)
				bin_set = cluster_set
		if self.debug:
			print "The bin_boundaries is ", bin_boundaries
			raw_input("pause:")
		
		#06-28-05	construct the final data structure to return
		key2data_array = {}
		for entry in data_list_2d:
			key = entry[key_column]
			bin_key_index = searchsorted(bin_boundaries, key)	#the trick is that the range is (...]. my bin_boundaries is [...)
													#transform below
			if bin_key_index==len(bin_boundaries):
				bin_key = bin_boundaries[bin_key_index-1]
			elif bin_boundaries[bin_key_index] == key:
				bin_key = key
			else:
				bin_key = bin_boundaries[bin_key_index-1]
			if bin_key not in key2data_array:
				key2data_array[bin_key] = []
			key2data_array[bin_key].append(entry)
		if self.debug:
			print "key2data_array is ",key2data_array
			raw_input("pause:")
		sys.stderr.write("Done.\n")
		return key2data_array

Example #15

0

Show file

File: recipe-335390.py Project: zlrs/code-1

def find_closest(input_array, target_array, tol):
    """

    Find the set of elements in input_array that are closest to
    elements in target_array.  Record the indices of the elements in
    target_array that are within tolerance, tol, of their closest
    match. Also record the indices of the elements in target_array
    that are outside tolerance, tol, of their match.

    For example, given an array of observations with irregular
    observation times along with an array of times of interest, this
    routine can be used to find those observations that are closest to
    the times of interest that are within a given time tolerance.

    NOTE: input_array must be sorted! The array, target_array, does not have to be sorted.

    Inputs:
      input_array:  a sorted Float64 numarray
      target_array: a Float64 numarray
      tol:          a tolerance

    Returns:
      closest_indices:  the array of indices of elements in input_array that are closest to elements in target_array
      accept_indices:  the indices of elements in target_array that have a match in input_array within tolerance
      reject_indices:  the indices of elements in target_array that do not have a match in input_array within tolerance
    """

    input_array_len = len(input_array)
    closest_indices = numarray.searchsorted(
        input_array,
        target_array)  # determine the locations of target_array in input_array
    acc_rej_indices = [-1] * len(target_array)
    curr_tol = [tol] * len(target_array)

    est_tol = 0.0
    for i in xrange(len(target_array)):
        best_off = 0  # used to adjust closest_indices[i] for best approximating element in input_array

        if closest_indices[i] >= input_array_len:
            # the value target_array[i] is >= all elements in input_array so check whether it is within tolerance of the last element
            closest_indices[i] = input_array_len - 1
            est_tol = target_array[i] - input_array[closest_indices[i]]
            if est_tol < curr_tol[i]:
                curr_tol[i] = est_tol
                acc_rej_indices[i] = i
        elif target_array[i] == input_array[closest_indices[i]]:
            # target_array[i] is in input_array
            est_tol = 0.0
            curr_tol[i] = 0.0
            acc_rej_indices[i] = i
        elif closest_indices[i] == 0:
            # target_array[i] is <= all elements in input_array
            est_tol = input_array[0] - target_array[i]
            if est_tol < curr_tol[i]:
                curr_tol[i] = est_tol
                acc_rej_indices[i] = i
        else:
            # target_array[i] is between input_array[closest_indices[i]-1] and input_array[closest_indices[i]]
            # and closest_indices[i] must be > 0
            top_tol = input_array[closest_indices[i]] - target_array[i]
            bot_tol = target_array[i] - input_array[closest_indices[i] - 1]
            if bot_tol <= top_tol:
                est_tol = bot_tol
                best_off = -1  # this is the only place where best_off != 0
            else:
                est_tol = top_tol

            if est_tol < curr_tol[i]:
                curr_tol[i] = est_tol
                acc_rej_indices[i] = i

        if est_tol <= tol:
            closest_indices[i] += best_off

    accept_indices = numarray.compress(numarray.greater(acc_rej_indices, -1),
                                       acc_rej_indices)
    reject_indices = numarray.compress(numarray.equal(acc_rej_indices, -1),
                                       numarray.arange(len(acc_rej_indices)))
    return (closest_indices, accept_indices, reject_indices)