def tri(N, M=None, k=0, typecode=None): """ returns a N-by-M matrix where all the diagonals starting from lower left corner up to the k-th are all ones. """ if M is None: M = N if type(M) == type('d'): #pearu: any objections to remove this feature? # As tri(N,'d') is equivalent to tri(N,typecode='d') typecode = M M = N m = greater_equal(subtract.outer(arange(N), arange(M)),-k) if typecode is None: return m else: return m.astype(typecode)
def ifftshift(x,axes=None): """ ifftshift(x,axes=None) - > y Inverse of fftshift. """ tmp = asarray(x) ndim = len(tmp.shape) if axes is None: axes = range(ndim) y = tmp for k in axes: n = tmp.shape[k] p2 = n-(n+1)/2 mylist = concatenate((arange(p2,n),arange(p2))) y = take(y,mylist,k) return y
def probplot(x, sparams=(), dist='norm', fit=1, plot=None): """Return (osm, osr){,(scale,loc,r)} where (osm, osr) are order statistic medians and ordered response data respectively so that plot(osm, osr) is a probability plot. If fit==1, then do a regression fit and compute the slope (scale), intercept (loc), and correlation coefficient (r), of the best straight line through the points. If fit==0, only (osm, osr) is returned. sparams is a tuple of shape parameter arguments for the distribution. """ N = len(x) Ui = zeros(N)*1.0 Ui[-1] = 0.5**(1.0/N) Ui[0] = 1-Ui[-1] i = arange(2,N) Ui[1:-1] = (i-0.3175)/(N+0.365) try: ppf_func = eval('distributions.%s.ppf'%dist) except AttributError: raise dist, "is not a valid distribution with a ppf." if sparams is None: sparams = () if isscalar(sparams): sparams = (sparams,) if not isinstance(sparams,types.TupleType): sparams = tuple(sparams) res = inspect.getargspec(ppf_func) if not ('loc' == res[0][-2] and 'scale' == res[0][-1] and \ 0.0==res[-1][-2] and 1.0==res[-1][-1]): raise ValueError, "Function has does not have default location", \ "and scale parameters\n that are 0.0 and 1.0 respectively." if (len(sparams) < len(res[0])-len(res[-1])-1) or \ (len(sparams) > len(res[0])-3): raise ValueError, "Incorrect number of shape parameters." osm = ppf_func(Ui,*sparams) osr = sort(x) if fit or (plot is not None): # perform a linear fit. slope, intercept, r, prob, sterrest = stats.linregress(osm,osr) if plot is not None: try: import scipy.xplt as xplt xplt.limits() except: pass plot.plot(osm, osr, 'o', osm, slope*osm + intercept) plot.title('Probability Plot') plot.xlabel('Order Statistic Medians') plot.ylabel('Ordered Values') try: plot.expand_limits(5) except: pass xmin,xmax= amin(osm),amax(osm) ymin,ymax= amin(x),amax(x) pos = xmin+0.70*(xmax-xmin), ymin+0.01*(ymax-ymin) try: plot.addtext("r^2^=%1.4f" % r, xy=pos,tosys=1) except: pass if fit: return (osm, osr), (slope, intercept, r) else: return osm, osr
def central_diff_weights(Np,ndiv=1): """Return weights for an Np-point central derivative of order ndiv assuming equally-spaced function points. If weights are in the vector w, then derivative is w[0] * f(x-ho*dx) + ... + w[-1] * f(x+h0*dx) Can be inaccurate for large number of points. """ assert (Np >= ndiv+1), "Number of points must be at least the derivative order + 1." assert (Np % 2 == 1), "Odd-number of points only." ho = Np >> 1 x = arange(-ho,ho+1.0) x = x[:,NewAxis] X = x**0.0 for k in range(1,Np): X = hstack([X,x**k]) w = product(arange(1,ndiv+1))*linalg.inv(X)[ndiv] return w
def fftshift(x,axes=None): """ fftshift(x, axes=None) -> y Shift zero-frequency component to center of spectrum. This function swaps half-spaces for all axes listed (defaults to all). Notes: If len(x) is even then the Nyquist component is y[0]. """ tmp = asarray(x) ndim = len(tmp.shape) if axes is None: axes = range(ndim) y = tmp for k in axes: n = tmp.shape[k] p2 = (n+1)/2 mylist = concatenate((arange(p2,n),arange(p2))) y = take(y,mylist,k) return y
def binom_test(x,n=None,p=0.5): """An exact (two-sided) test of the null hypothesis that the probability of success in a Bernoulli experiment is p. Inputs: x -- Number of successes (or a vector of length 2 giving the number of successes and number of failures respectively) n -- Number of trials (ignored if x has length 2) p -- Hypothesized probability of success Returns pval -- Probability that null test is rejected for this set of x and n even though it is true. """ x = atleast_1d(x) if len(x) == 2: n = x[1]+x[0] x = x[0] elif len(x) == 1: x = x[0] if n is None or n < x: raise ValueError, "n must be >= x" else: raise ValueError, "Incorrect length for x." if (p > 1.0) or (p < 0.0): raise ValueError, "p must be in range [0,1]" d = distributions.binom.pmf(x,n,p) rerr = 1+1e-7 if (x*1.0/n < p): i = arange(x+1,n+1) y = sum(distributions.binom.pmf(i,n,p) <= d*rerr) pval = distributions.binom.cdf(x,n,p) + distributions.binom.sf(n-y,n,p) else: i = arange(0,x) y = sum(distributions.binom.pmf(i,n,p) <= d*rerr) pval = distributions.binom.cdf(y-1,n,p) + distributions.binom.sf(x-1,n,p) return min(1.0,pval)
def boxcox_normmax(x,brack=(-1.0,1.0)): N = len(x) # compute uniform median statistics Ui = zeros(N)*1.0 Ui[-1] = 0.5**(1.0/N) Ui[0] = 1-Ui[-1] i = arange(2,N) Ui[1:-1] = (i-0.3175)/(N+0.365) # this function computes the x-axis values of the probability plot # and computes a linear regression (including the correlation) # and returns 1-r so that a minimization function maximizes the # correlation xvals = distributions.norm.ppf(Ui) def tempfunc(lmbda, xvals, samps): y = boxcox(samps,lmbda) yvals = sort(y) r, prob = stats.pearsonr(xvals, yvals) return 1-r return optimize.brent(tempfunc, brack=brack, args=(xvals, x))
def ppcc_max(x, brack=(0.0,1.0), dist='tukeylambda'): """Returns the shape parameter that maximizes the probability plot correlation coefficient for the given data to a one-parameter family of distributions. See also ppcc_plot """ try: ppf_func = eval('distributions.%s.ppf'%dist) except AttributError: raise dist, "is not a valid distribution with a ppf." res = inspect.getargspec(ppf_func) if not ('loc' == res[0][-2] and 'scale' == res[0][-1] and \ 0.0==res[-1][-2] and 1.0==res[-1][-1]): raise ValueError, "Function has does not have default location", \ "and scale parameters\n that are 0.0 and 1.0 respectively." if (1 < len(res[0])-len(res[-1])-1) or \ (1 > len(res[0])-3): raise ValueError, "Must be a one-parameter family." N = len(x) # compute uniform median statistics Ui = zeros(N)*1.0 Ui[-1] = 0.5**(1.0/N) Ui[0] = 1-Ui[-1] i = arange(2,N) Ui[1:-1] = (i-0.3175)/(N+0.365) osr = sort(x) # this function computes the x-axis values of the probability plot # and computes a linear regression (including the correlation) # and returns 1-r so that a minimization function maximizes the # correlation def tempfunc(shape, mi, yvals, func): xvals = func(mi, shape) r, prob = stats.pearsonr(xvals, yvals) return 1-r return optimize.brent(tempfunc, brack=brack, args=(Ui, osr, ppf_func))
def anderson(x,dist='norm'): """Anderson and Darling test for normal, exponential, or Gumbel (Extreme Value Type I) distribution. Given samples x, return A2, the Anderson-Darling statistic, the significance levels in percentages, and the corresponding critical values. Critical values provided are for the following significance levels norm/expon: 15%, 10%, 5%, 2.5%, 1% Gumbel: 25%, 10%, 5%, 2.5%, 1% logistic: 25%, 10%, 5%, 2.5%, 1%, 0.5% If A2 is larger than these critical values then for that significance level, the hypothesis that the data come from a normal (exponential) can be rejected. """ if not dist in ['norm','expon','gumbel','extreme1','logistic']: raise ValueError, "Invalid distribution." y = sort(x) xbar = stats.mean(x) N = len(y) if dist == 'norm': s = stats.std(x) w = (y-xbar)/s z = distributions.norm.cdf(w) sig = array([15,10,5,2.5,1]) critical = around(_Avals_norm / (1.0 + 4.0/N - 25.0/N/N),3) elif dist == 'expon': w = y / xbar z = distributions.expon.cdf(w) sig = array([15,10,5,2.5,1]) critical = around(_Avals_expon / (1.0 + 0.6/N),3) elif dist == 'logistic': def rootfunc(ab,xj,N): a,b = ab tmp = (xj-a)/b tmp2 = exp(tmp) val = [sum(1.0/(1+tmp2))-0.5*N, sum(tmp*(1.0-tmp2)/(1+tmp2))+N] return array(val) sol0=array([xbar,stats.std(x)]) sol = optimize.fsolve(rootfunc,sol0,args=(x,N),xtol=1e-5) w = (y-sol[0])/sol[1] z = distributions.logistic.cdf(w) sig = array([25,10,5,2.5,1,0.5]) critical = around(_Avals_logistic / (1.0+0.25/N),3) else: def fixedsolve(th,xj,N): val = stats.sum(xj)*1.0/N tmp = exp(-xj/th) term = sum(xj*tmp) term /= sum(tmp) return val - term s = optimize.fixed_point(fixedsolve, 1.0, args=(x,N),xtol=1e-5) xbar = -s*log(sum(exp(-x/s))*1.0/N) w = (y-xbar)/s z = distributions.gumbel_l.cdf(w) sig = array([25,10,5,2.5,1]) critical = around(_Avals_gumbel / (1.0 + 0.2/sqrt(N)),3) i = arange(1,N+1) S = sum((2*i-1.0)/N*(log(z)+log(1-z[::-1]))) A2 = -N-S return A2, critical, sig
def toimage(arr,high=255,low=0,cmin=None,cmax=None,pal=None, mode=None,channel_axis=None): """Takes a Numeric array and returns a PIL image. The mode of the PIL image depends on the array shape, the pal keyword, and the mode keyword. For 2-D arrays, if pal is a valid (N,3) byte-array giving the RGB values (from 0 to 255) then mode='P', otherwise mode='L', unless mode is given as 'F' or 'I' in which case a float and/or integer array is made For 3-D arrays, the channel_axis argument tells which dimension of the array holds the channel data. For 3-D arrays if one of the dimensions is 3, the mode is 'RGB' by default or 'YCbCr' if selected. if the The Numeric array must be either 2 dimensional or 3 dimensional. """ data = asarray(arr) if iscomplexobj(data): raise ValueError, "Cannot convert a complex-valued array." shape = list(data.shape) valid = len(shape)==2 or ((len(shape)==3) and \ ((3 in shape) or (4 in shape))) assert valid, "Not a suitable array shape for any mode." if len(shape) == 2: shape = (shape[1],shape[0]) # columns show up first if mode == 'F': image = Image.fromstring(mode,shape,data.astype('f').tostring()) return image if mode in [None, 'L', 'P']: bytedata = bytescale(data,high=high,low=low,cmin=cmin,cmax=cmax) image = Image.fromstring('L',shape,bytedata.tostring()) if pal is not None: image.putpalette(asarray(pal,typecode=_UInt8).tostring()) # Becomes a mode='P' automagically. elif mode == 'P': # default gray-scale pal = arange(0,256,1,typecode='b')[:,NewAxis] * \ ones((3,),typecode='b')[NewAxis,:] image.putpalette(asarray(pal,typecode=_UInt8).tostring()) return image if mode == '1': # high input gives threshold for 1 bytedata = ((data > high)*255).astype('b') image = Image.fromstring('L',shape,bytedata.tostring()) image = image.convert(mode='1') return image if cmin is None: cmin = amin(ravel(data)) if cmax is None: cmax = amax(ravel(data)) data = (data*1.0 - cmin)*(high-low)/(cmax-cmin) + low if mode == 'I': image = Image.fromstring(mode,shape,data.astype('i').tostring()) else: raise ValueError, _errstr return image # if here then 3-d array with a 3 or a 4 in the shape length. # Check for 3 in datacube shape --- 'RGB' or 'YCbCr' if channel_axis is None: if (3 in shape): ca = Numeric.nonzero(asarray(shape) == 3)[0] else: ca = Numeric.nonzero(asarray(shape) == 4) if len(ca): ca = ca[0] else: raise ValueError, "Could not find channel dimension." else: ca = channel_axis numch = shape[ca] if numch not in [3,4]: raise ValueError, "Channel axis dimension is not valid." bytedata = bytescale(data,high=high,low=low,cmin=cmin,cmax=cmax) if ca == 2: strdata = bytedata.tostring() shape = (shape[1],shape[0]) elif ca == 1: strdata = transpose(bytedata,(0,2,1)).tostring() shape = (shape[2],shape[0]) elif ca == 0: strdata = transpose(bytedata,(1,2,0)).tostring() shape = (shape[2],shape[1]) if mode is None: if numch == 3: mode = 'RGB' else: mode = 'RGBA' if mode not in ['RGB','RGBA','YCbCr','CMYK']: raise ValueError, _errstr if mode in ['RGB', 'YCbCr']: assert numch == 3, "Invalid array shape for mode." if mode in ['RGBA', 'CMYK']: assert numch == 4, "Invalid array shape for mode." # Here we know data and mode is coorect image = Image.fromstring(mode, shape, strdata) return image