def probplot(x, sparams=(), dist='norm', fit=1, plot=None): """Return (osm, osr){,(scale,loc,r)} where (osm, osr) are order statistic medians and ordered response data respectively so that plot(osm, osr) is a probability plot. If fit==1, then do a regression fit and compute the slope (scale), intercept (loc), and correlation coefficient (r), of the best straight line through the points. If fit==0, only (osm, osr) is returned. sparams is a tuple of shape parameter arguments for the distribution. """ N = len(x) Ui = zeros(N)*1.0 Ui[-1] = 0.5**(1.0/N) Ui[0] = 1-Ui[-1] i = arange(2,N) Ui[1:-1] = (i-0.3175)/(N+0.365) try: ppf_func = eval('distributions.%s.ppf'%dist) except AttributError: raise dist, "is not a valid distribution with a ppf." if sparams is None: sparams = () if isscalar(sparams): sparams = (sparams,) if not isinstance(sparams,types.TupleType): sparams = tuple(sparams) res = inspect.getargspec(ppf_func) if not ('loc' == res[0][-2] and 'scale' == res[0][-1] and \ 0.0==res[-1][-2] and 1.0==res[-1][-1]): raise ValueError, "Function has does not have default location", \ "and scale parameters\n that are 0.0 and 1.0 respectively." if (len(sparams) < len(res[0])-len(res[-1])-1) or \ (len(sparams) > len(res[0])-3): raise ValueError, "Incorrect number of shape parameters." osm = ppf_func(Ui,*sparams) osr = sort(x) if fit or (plot is not None): # perform a linear fit. slope, intercept, r, prob, sterrest = stats.linregress(osm,osr) if plot is not None: try: import scipy.xplt as xplt xplt.limits() except: pass plot.plot(osm, osr, 'o', osm, slope*osm + intercept) plot.title('Probability Plot') plot.xlabel('Order Statistic Medians') plot.ylabel('Ordered Values') try: plot.expand_limits(5) except: pass xmin,xmax= amin(osm),amax(osm) ymin,ymax= amin(x),amax(x) pos = xmin+0.70*(xmax-xmin), ymin+0.01*(ymax-ymin) try: plot.addtext("r^2^=%1.4f" % r, xy=pos,tosys=1) except: pass if fit: return (osm, osr), (slope, intercept, r) else: return osm, osr
def norm(x, ord=2): """ norm(x, ord=2) -> n Matrix and vector norm. Inputs: x -- a rank-1 (vector) or rank-2 (matrix) array ord -- the order of norm. Comments: For vectors ord can be any real number including Inf or -Inf. ord = Inf, computes the maximum of the magnitudes ord = -Inf, computes minimum of the magnitudes ord is finite, computes sum(abs(x)**ord)**(1.0/ord) For matrices ord can only be + or - 1, 2, Inf. ord = 2 computes the largest singular value ord = -2 computes the smallest singular value ord = 1 computes the largest column sum of absolute values ord = -1 computes the smallest column sum of absolute values ord = Inf computes the largest row sum of absolute values ord = -Inf computes the smallest row sum of absolute values ord = 'fro' computes the frobenius norm sqrt(sum(diag(X.H * X))) """ x = asarray_chkfinite(x) nd = len(x.shape) Inf = scipy_base.Inf if nd == 1: if ord == Inf: return scipy_base.amax(abs(x)) elif ord == -Inf: return scipy_base.amin(abs(x)) else: return scipy_base.sum(abs(x)**ord)**(1.0/ord) elif nd == 2: if ord == 2: return scipy_base.amax(decomp.svd(x,compute_uv=0)) elif ord == -2: return scipy_base.amin(decomp.svd(x,compute_uv=0)) elif ord == 1: return scipy_base.amax(scipy_base.sum(abs(x))) elif ord == Inf: return scipy_base.amax(scipy_base.sum(abs(x),axis=1)) elif ord == -1: return scipy_base.amin(scipy_base.sum(abs(x))) elif ord == -Inf: return scipy_base.amin(scipy_base.sum(abs(x),axis=1)) elif ord in ['fro','f']: val = real((conjugate(x)*x).flat) return sqrt(add.reduce(val)) else: raise ValueError, "Invalid norm order for matrices." else: raise ValueError, "Improper number of dimensions to norm."
def bytescale(data, cmin=None, cmax=None, high=255, low=0): if data.typecode == _UInt8: return data high = high - low if cmin is None: cmin = amin(ravel(data)) if cmax is None: cmax = amax(ravel(data)) scale = high *1.0 / (cmax-cmin or 1) bytedata = ((data*1.0-cmin)*scale + 0.4999).astype(_UInt8) return bytedata + cast[_UInt8](low)
def ansari(x,y): """Determine if the scale parameter for two distributions with equal medians is the same using the Ansari-Bradley statistic. Specifically, compute the AB statistic and the probability of error that the null hypothesis is true but rejected with the computed statistic as the critical value. One can reject the null hypothesis that the ratio of variances is 1 if returned probability of error is small (say < 0.05) """ x,y = asarray(x),asarray(y) n = len(x) m = len(y) if (m < 1): raise ValueError, "Not enough other observations." if (n < 1): raise ValueError, "Not enough test observations." N = m+n xy = r_[x,y] # combine rank = stats.rankdata(xy) symrank = amin(array((rank,N-rank+1)),0) AB = sum(symrank[:n]) uxy = unique(xy) repeats = (len(uxy) != len(xy)) exact = ((m<55) and (n<55) and not repeats) if repeats and ((m < 55) or (n < 55)): print "Ties preclude use of exact statistic." if exact: astart, a1, ifault = statlib.gscale(n,m) ind = AB-astart total = sum(a1) if ind < len(a1)/2.0: cind = int(ceil(ind)) if (ind == cind): pval = 2.0*sum(a1[:cind+1])/total else: pval = 2.0*sum(a1[:cind])/total else: find = int(floor(ind)) if (ind == floor(ind)): pval = 2.0*sum(a1[find:])/total else: pval = 2.0*sum(a1[find+1:])/total return AB, min(1.0,pval) # otherwise compute normal approximation if N % 2: # N odd mnAB = n*(N+1.0)**2 / 4.0 / N varAB = n*m*(N+1.0)*(3+N**2)/(48.0*N**2) else: mnAB = n*(N+2.0)/4.0 varAB = m*n*(N+2)*(N-2.0)/48/(N-1.0) if repeats: # adjust variance estimates # compute sum(tj * rj**2) fac = sum(symrank**2) if N % 2: # N odd varAB = m*n*(16*N*fac-(N+1)**4)/(16.0 * N**2 * (N-1)) else: # N even varAB = m*n*(16*fac-N*(N+2)**2)/(16.0 * N * (N-1)) z = (AB - mnAB)/sqrt(varAB) pval = (1-distributions.norm.cdf(abs(z)))*2.0 return AB, pval
def toimage(arr,high=255,low=0,cmin=None,cmax=None,pal=None, mode=None,channel_axis=None): """Takes a Numeric array and returns a PIL image. The mode of the PIL image depends on the array shape, the pal keyword, and the mode keyword. For 2-D arrays, if pal is a valid (N,3) byte-array giving the RGB values (from 0 to 255) then mode='P', otherwise mode='L', unless mode is given as 'F' or 'I' in which case a float and/or integer array is made For 3-D arrays, the channel_axis argument tells which dimension of the array holds the channel data. For 3-D arrays if one of the dimensions is 3, the mode is 'RGB' by default or 'YCbCr' if selected. if the The Numeric array must be either 2 dimensional or 3 dimensional. """ data = asarray(arr) if iscomplexobj(data): raise ValueError, "Cannot convert a complex-valued array." shape = list(data.shape) valid = len(shape)==2 or ((len(shape)==3) and \ ((3 in shape) or (4 in shape))) assert valid, "Not a suitable array shape for any mode." if len(shape) == 2: shape = (shape[1],shape[0]) # columns show up first if mode == 'F': image = Image.fromstring(mode,shape,data.astype('f').tostring()) return image if mode in [None, 'L', 'P']: bytedata = bytescale(data,high=high,low=low,cmin=cmin,cmax=cmax) image = Image.fromstring('L',shape,bytedata.tostring()) if pal is not None: image.putpalette(asarray(pal,typecode=_UInt8).tostring()) # Becomes a mode='P' automagically. elif mode == 'P': # default gray-scale pal = arange(0,256,1,typecode='b')[:,NewAxis] * \ ones((3,),typecode='b')[NewAxis,:] image.putpalette(asarray(pal,typecode=_UInt8).tostring()) return image if mode == '1': # high input gives threshold for 1 bytedata = ((data > high)*255).astype('b') image = Image.fromstring('L',shape,bytedata.tostring()) image = image.convert(mode='1') return image if cmin is None: cmin = amin(ravel(data)) if cmax is None: cmax = amax(ravel(data)) data = (data*1.0 - cmin)*(high-low)/(cmax-cmin) + low if mode == 'I': image = Image.fromstring(mode,shape,data.astype('i').tostring()) else: raise ValueError, _errstr return image # if here then 3-d array with a 3 or a 4 in the shape length. # Check for 3 in datacube shape --- 'RGB' or 'YCbCr' if channel_axis is None: if (3 in shape): ca = Numeric.nonzero(asarray(shape) == 3)[0] else: ca = Numeric.nonzero(asarray(shape) == 4) if len(ca): ca = ca[0] else: raise ValueError, "Could not find channel dimension." else: ca = channel_axis numch = shape[ca] if numch not in [3,4]: raise ValueError, "Channel axis dimension is not valid." bytedata = bytescale(data,high=high,low=low,cmin=cmin,cmax=cmax) if ca == 2: strdata = bytedata.tostring() shape = (shape[1],shape[0]) elif ca == 1: strdata = transpose(bytedata,(0,2,1)).tostring() shape = (shape[2],shape[0]) elif ca == 0: strdata = transpose(bytedata,(1,2,0)).tostring() shape = (shape[2],shape[1]) if mode is None: if numch == 3: mode = 'RGB' else: mode = 'RGBA' if mode not in ['RGB','RGBA','YCbCr','CMYK']: raise ValueError, _errstr if mode in ['RGB', 'YCbCr']: assert numch == 3, "Invalid array shape for mode." if mode in ['RGBA', 'CMYK']: assert numch == 4, "Invalid array shape for mode." # Here we know data and mode is coorect image = Image.fromstring(mode, shape, strdata) return image