def rootfunc(ab,xj,N): a,b = ab tmp = (xj-a)/b tmp2 = exp(tmp) val = [sum(1.0/(1+tmp2))-0.5*N, sum(tmp*(1.0-tmp2)/(1+tmp2))+N] return array(val)
def lena(): import cPickle, os fname = os.path.join(os.path.dirname(__file__),'plt','lena.dat') f = open(fname,'rb') lena = array(cPickle.load(f)) f.close() return lena
def _make_complex_eigvecs(w,vin,cmplx_tcode): v = scipy_base.array(vin,typecode=cmplx_tcode) ind = scipy_base.nonzero(scipy_base.not_equal(w.imag,0.0)) vnew = scipy_base.zeros((v.shape[0],len(ind)>>1),cmplx_tcode) vnew.real = scipy_base.take(vin,ind[::2],1) vnew.imag = scipy_base.take(vin,ind[1::2],1) count = 0 conj = scipy_base.conjugate for i in range(len(ind)/2): v[:,ind[2*i]] = vnew[:,count] v[:,ind[2*i+1]] = conj(vnew[:,count]) count += 1 return v
def rfftfreq(n,d=1.0): """ rfftfreq(n, d=1.0) -> f DFT sample frequencies (for usage with rfft,irfft). The returned float array contains the frequency bins in cycles/unit (with zero at the start) given a window length n and a sample spacing d: f = [0,1,1,2,2,...,n/2-1,n/2-1,n/2]/(d*n) if n is even f = [0,1,1,2,2,...,n/2-1,n/2-1,n/2,n/2]/(d*n) if n is odd """ assert isinstance(n,types.IntType) return array(range(1,n+1),'i')/2/float(n*d)
def oneway(*args,**kwds): """Test for equal means in two or more samples from the normal distribution. If the keyword parameter <equal_var> is true then the variances are assumed to be equal, otherwise they are not assumed to be equal (default). Return test statistic and the p-value giving the probability of error if the null hypothesis (equal means) is rejected at this value. """ k = len(args) if k < 2: raise ValueError, "Must enter at least two input sample vectors." if 'equal_var' in kwds.keys(): if kwds['equal_var']: evar = 1 else: evar = 0 else: evar = 0 Ni = array([len(args[i]) for i in range(k)]) Mi = array([stats.mean(args[i]) for i in range(k)]) Vi = array([stats.var(args[i]) for i in range(k)]) Wi = Ni / Vi swi = sum(Wi) N = sum(Ni) my = sum(Mi*Ni)*1.0/N tmp = sum((1-Wi/swi)**2 / (Ni-1.0))/(k*k-1.0) if evar: F = ((sum(Ni*(Mi-my)**2) / (k-1.0)) / (sum((Ni-1.0)*Vi) / (N-k))) pval = distributions.f.sf(F,k-1,N-k) # 1-cdf else: m = sum(Wi*Mi)*1.0/swi F = sum(Wi*(Mi-m)**2) / ((k-1.0)*(1+2*(k-2)*tmp)) pval = distributions.f.sf(F,k-1.0,1.0/(3*tmp)) return F, pval
def fftfreq(n,d=1.0): """ fftfreq(n, d=1.0) -> f DFT sample frequencies The returned float array contains the frequency bins in cycles/unit (with zero at the start) given a window length n and a sample spacing d: f = [0,1,...,n/2-1,-n/2,...,-1]/(d*n) if n is even f = [0,1,...,(n-1)/2,-(n-1)/2,...,-1]/(d*n) if n is odd """ assert isinstance(n,types.IntType) k = range(0,(n-1)/2+1)+range(-(n/2),0) return array(k,'d')/(n*d)
def derivative(func,x0,dx=1.0,n=1,args=(),order=3): """Given a function, use a central difference formula with spacing dx to compute the nth derivative at x0. order is the number of points to use and must be odd. Warning: Decreasing the step size too small can result in round-off error. """ assert (order >= n+1), "Number of points must be at least the derivative order + 1." assert (order % 2 == 1), "Odd number of points only." # pre-computed for n=1 and 2 and low-order for speed. if n==1: if order == 3: weights = array([-1,0,1])/2.0 elif order == 5: weights = array([1,-8,0,8,-1])/12.0 elif order == 7: weights = array([-1,9,-45,0,45,-9,1])/60.0 elif order == 9: weights = array([3,-32,168,-672,0,672,-168,32,-3])/840.0 else: weights = central_diff_weights(order,1) elif n==2: if order == 3: weights = array([1,-2.0,1]) elif order == 5: weights = array([-1,16,-30,16,-1])/12.0 elif order == 7: weights = array([2,-27,270,-490,270,-27,2])/180.0 elif order == 9: weights = array([-9,128,-1008,8064,-14350,8064,-1008,128,-9])/5040.0 else: weights = central_diff_weights(order,2) else: weights = central_diff_weights(order, n) val = 0.0 ho = order >> 1 for k in range(order): val += weights[k]*func(x0+(k-ho)*dx,*args) return val / product((dx,)*n)
def ansari(x,y): """Determine if the scale parameter for two distributions with equal medians is the same using the Ansari-Bradley statistic. Specifically, compute the AB statistic and the probability of error that the null hypothesis is true but rejected with the computed statistic as the critical value. One can reject the null hypothesis that the ratio of variances is 1 if returned probability of error is small (say < 0.05) """ x,y = asarray(x),asarray(y) n = len(x) m = len(y) if (m < 1): raise ValueError, "Not enough other observations." if (n < 1): raise ValueError, "Not enough test observations." N = m+n xy = r_[x,y] # combine rank = stats.rankdata(xy) symrank = amin(array((rank,N-rank+1)),0) AB = sum(symrank[:n]) uxy = unique(xy) repeats = (len(uxy) != len(xy)) exact = ((m<55) and (n<55) and not repeats) if repeats and ((m < 55) or (n < 55)): print "Ties preclude use of exact statistic." if exact: astart, a1, ifault = statlib.gscale(n,m) ind = AB-astart total = sum(a1) if ind < len(a1)/2.0: cind = int(ceil(ind)) if (ind == cind): pval = 2.0*sum(a1[:cind+1])/total else: pval = 2.0*sum(a1[:cind])/total else: find = int(floor(ind)) if (ind == floor(ind)): pval = 2.0*sum(a1[find:])/total else: pval = 2.0*sum(a1[find+1:])/total return AB, min(1.0,pval) # otherwise compute normal approximation if N % 2: # N odd mnAB = n*(N+1.0)**2 / 4.0 / N varAB = n*m*(N+1.0)*(3+N**2)/(48.0*N**2) else: mnAB = n*(N+2.0)/4.0 varAB = m*n*(N+2)*(N-2.0)/48/(N-1.0) if repeats: # adjust variance estimates # compute sum(tj * rj**2) fac = sum(symrank**2) if N % 2: # N odd varAB = m*n*(16*N*fac-(N+1)**4)/(16.0 * N**2 * (N-1)) else: # N even varAB = m*n*(16*fac-N*(N+2)**2)/(16.0 * N * (N-1)) z = (AB - mnAB)/sqrt(varAB) pval = (1-distributions.norm.cdf(abs(z)))*2.0 return AB, pval
def anderson(x,dist='norm'): """Anderson and Darling test for normal, exponential, or Gumbel (Extreme Value Type I) distribution. Given samples x, return A2, the Anderson-Darling statistic, the significance levels in percentages, and the corresponding critical values. Critical values provided are for the following significance levels norm/expon: 15%, 10%, 5%, 2.5%, 1% Gumbel: 25%, 10%, 5%, 2.5%, 1% logistic: 25%, 10%, 5%, 2.5%, 1%, 0.5% If A2 is larger than these critical values then for that significance level, the hypothesis that the data come from a normal (exponential) can be rejected. """ if not dist in ['norm','expon','gumbel','extreme1','logistic']: raise ValueError, "Invalid distribution." y = sort(x) xbar = stats.mean(x) N = len(y) if dist == 'norm': s = stats.std(x) w = (y-xbar)/s z = distributions.norm.cdf(w) sig = array([15,10,5,2.5,1]) critical = around(_Avals_norm / (1.0 + 4.0/N - 25.0/N/N),3) elif dist == 'expon': w = y / xbar z = distributions.expon.cdf(w) sig = array([15,10,5,2.5,1]) critical = around(_Avals_expon / (1.0 + 0.6/N),3) elif dist == 'logistic': def rootfunc(ab,xj,N): a,b = ab tmp = (xj-a)/b tmp2 = exp(tmp) val = [sum(1.0/(1+tmp2))-0.5*N, sum(tmp*(1.0-tmp2)/(1+tmp2))+N] return array(val) sol0=array([xbar,stats.std(x)]) sol = optimize.fsolve(rootfunc,sol0,args=(x,N),xtol=1e-5) w = (y-sol[0])/sol[1] z = distributions.logistic.cdf(w) sig = array([25,10,5,2.5,1,0.5]) critical = around(_Avals_logistic / (1.0+0.25/N),3) else: def fixedsolve(th,xj,N): val = stats.sum(xj)*1.0/N tmp = exp(-xj/th) term = sum(xj*tmp) term /= sum(tmp) return val - term s = optimize.fixed_point(fixedsolve, 1.0, args=(x,N),xtol=1e-5) xbar = -s*log(sum(exp(-x/s))*1.0/N) w = (y-xbar)/s z = distributions.gumbel_l.cdf(w) sig = array([25,10,5,2.5,1]) critical = around(_Avals_gumbel / (1.0 + 0.2/sqrt(N)),3) i = arange(1,N+1) S = sum((2*i-1.0)/N*(log(z)+log(1-z[::-1]))) A2 = -N-S return A2, critical, sig
init = 1 y = sort(x) a,w,pw,ifault = statlib.swilk(y,a[:N/2],init) if not ifault in [0,2]: print ifault if N > 5000: print "p-value may not be accurate for N > 5000." if reta: return w, pw, a else: return w, pw # Values from Stephens, M A, "EDF Statistics for Goodness of Fit and # Some Comparisons", Journal of he American Statistical # Association, Vol. 69, Issue 347, Sept. 1974, pp 730-737 _Avals_norm = array([0.576, 0.656, 0.787, 0.918, 1.092]) _Avals_expon = array([0.922, 1.078, 1.341, 1.606, 1.957]) # From Stephens, M A, "Goodness of Fit for the Extreme Value Distribution", # Biometrika, Vol. 64, Issue 3, Dec. 1977, pp 583-588. _Avals_gumbel = array([0.474, 0.637, 0.757, 0.877, 1.038]) # From Stephens, M A, "Tests of Fit for the Logistic Distribution Based # on the Empirical Distribution Function.", Biometrika, # Vol. 66, Issue 3, Dec. 1979, pp 591-595. _Avals_logistic = array([0.426, 0.563, 0.660, 0.769, 0.906, 1.010]) def anderson(x,dist='norm'): """Anderson and Darling test for normal, exponential, or Gumbel (Extreme Value Type I) distribution. Given samples x, return A2, the Anderson-Darling statistic, the significance levels in percentages, and the corresponding critical values.