Example #1
0
 def rootfunc(ab,xj,N):
     a,b = ab
     tmp = (xj-a)/b
     tmp2 = exp(tmp)
     val = [sum(1.0/(1+tmp2))-0.5*N,
            sum(tmp*(1.0-tmp2)/(1+tmp2))+N]
     return array(val)
Example #2
0
def lena():
    import cPickle, os
    fname = os.path.join(os.path.dirname(__file__),'plt','lena.dat')
    f = open(fname,'rb')
    lena = array(cPickle.load(f))
    f.close()
    return lena
Example #3
0
def _make_complex_eigvecs(w,vin,cmplx_tcode):
    v = scipy_base.array(vin,typecode=cmplx_tcode)
    ind = scipy_base.nonzero(scipy_base.not_equal(w.imag,0.0))
    vnew = scipy_base.zeros((v.shape[0],len(ind)>>1),cmplx_tcode)
    vnew.real = scipy_base.take(vin,ind[::2],1)
    vnew.imag = scipy_base.take(vin,ind[1::2],1)
    count = 0
    conj = scipy_base.conjugate
    for i in range(len(ind)/2):
        v[:,ind[2*i]] = vnew[:,count]
        v[:,ind[2*i+1]] = conj(vnew[:,count])
        count += 1
    return v
Example #4
0
def rfftfreq(n,d=1.0):
    """ rfftfreq(n, d=1.0) -> f

    DFT sample frequencies (for usage with rfft,irfft).

    The returned float array contains the frequency bins in
    cycles/unit (with zero at the start) given a window length n and a
    sample spacing d:

      f = [0,1,1,2,2,...,n/2-1,n/2-1,n/2]/(d*n)   if n is even
      f = [0,1,1,2,2,...,n/2-1,n/2-1,n/2,n/2]/(d*n)   if n is odd
    """
    assert isinstance(n,types.IntType)
    return array(range(1,n+1),'i')/2/float(n*d)
Example #5
0
def oneway(*args,**kwds):
    """Test for equal means in two or more samples from the
    normal distribution.

    If the keyword parameter <equal_var> is true then the variances
    are assumed to be equal, otherwise they are not assumed to
    be equal (default).

    Return test statistic and the p-value giving the probability
    of error if the null hypothesis (equal means) is rejected at this value.
    """
    k = len(args)
    if k < 2:
        raise ValueError, "Must enter at least two input sample vectors."
    if 'equal_var' in kwds.keys():
        if kwds['equal_var']: evar = 1
        else: evar = 0
    else:
        evar = 0

    Ni = array([len(args[i]) for i in range(k)])
    Mi = array([stats.mean(args[i]) for i in range(k)])
    Vi = array([stats.var(args[i]) for i in range(k)])
    Wi = Ni / Vi
    swi = sum(Wi)
    N = sum(Ni)
    my = sum(Mi*Ni)*1.0/N
    tmp = sum((1-Wi/swi)**2 / (Ni-1.0))/(k*k-1.0)
    if evar:
        F = ((sum(Ni*(Mi-my)**2) / (k-1.0)) / (sum((Ni-1.0)*Vi) / (N-k)))
        pval = distributions.f.sf(F,k-1,N-k)  # 1-cdf
    else:
        m = sum(Wi*Mi)*1.0/swi
        F = sum(Wi*(Mi-m)**2) / ((k-1.0)*(1+2*(k-2)*tmp))
        pval = distributions.f.sf(F,k-1.0,1.0/(3*tmp))

    return F, pval
Example #6
0
def fftfreq(n,d=1.0):
    """ fftfreq(n, d=1.0) -> f

    DFT sample frequencies

    The returned float array contains the frequency bins in
    cycles/unit (with zero at the start) given a window length n and a
    sample spacing d:

      f = [0,1,...,n/2-1,-n/2,...,-1]/(d*n)         if n is even
      f = [0,1,...,(n-1)/2,-(n-1)/2,...,-1]/(d*n)   if n is odd
    """
    assert isinstance(n,types.IntType)
    k = range(0,(n-1)/2+1)+range(-(n/2),0)
    return array(k,'d')/(n*d)
Example #7
0
def derivative(func,x0,dx=1.0,n=1,args=(),order=3):
    """Given a function, use a central difference formula with spacing dx to
       compute the nth derivative at x0.
       
       order is the number of points to use and must be odd.

       Warning: Decreasing the step size too small can result in
       round-off error.
    """
    assert (order >= n+1), "Number of points must be at least the derivative order + 1."
    assert (order % 2 == 1), "Odd number of points only."
    # pre-computed for n=1 and 2 and low-order for speed.
    if n==1:
        if order == 3:
            weights = array([-1,0,1])/2.0
        elif order == 5:
            weights = array([1,-8,0,8,-1])/12.0
        elif order == 7:
            weights = array([-1,9,-45,0,45,-9,1])/60.0
        elif order == 9:
            weights = array([3,-32,168,-672,0,672,-168,32,-3])/840.0
        else:
            weights = central_diff_weights(order,1)
    elif n==2:
        if order == 3:
            weights = array([1,-2.0,1])
        elif order == 5:
            weights = array([-1,16,-30,16,-1])/12.0
        elif order == 7:
            weights = array([2,-27,270,-490,270,-27,2])/180.0
        elif order == 9:
            weights = array([-9,128,-1008,8064,-14350,8064,-1008,128,-9])/5040.0
        else:
            weights = central_diff_weights(order,2)
    else:
        weights = central_diff_weights(order, n)
    val = 0.0
    ho = order >> 1
    for k in range(order):
        val += weights[k]*func(x0+(k-ho)*dx,*args)
    return val / product((dx,)*n)
Example #8
0
def ansari(x,y):
    """Determine if the scale parameter for two distributions with equal
    medians is the same using the Ansari-Bradley statistic.

    Specifically, compute the AB statistic and the probability of error
    that the null hypothesis is true but rejected with the computed
    statistic as the critical value.

    One can reject the null hypothesis that the ratio of variances is 1 if
    returned probability of error is small (say < 0.05)
    """
    x,y = asarray(x),asarray(y)
    n = len(x)
    m = len(y)
    if (m < 1):
        raise ValueError, "Not enough other observations."
    if (n < 1):
        raise ValueError, "Not enough test observations."
    N = m+n
    xy = r_[x,y]  # combine
    rank = stats.rankdata(xy)
    symrank = amin(array((rank,N-rank+1)),0)
    AB = sum(symrank[:n])
    uxy = unique(xy)
    repeats = (len(uxy) != len(xy))
    exact = ((m<55) and (n<55) and not repeats)
    if repeats and ((m < 55)  or (n < 55)):
        print "Ties preclude use of exact statistic."
    if exact:
        astart, a1, ifault = statlib.gscale(n,m)
        ind = AB-astart
        total = sum(a1)
        if ind < len(a1)/2.0:
            cind = int(ceil(ind))
            if (ind == cind):
                pval = 2.0*sum(a1[:cind+1])/total
            else:
                pval = 2.0*sum(a1[:cind])/total
        else:
            find = int(floor(ind))
            if (ind == floor(ind)):
                pval = 2.0*sum(a1[find:])/total
            else:
                pval = 2.0*sum(a1[find+1:])/total
        return AB, min(1.0,pval)
    
    # otherwise compute normal approximation
    if N % 2:  # N odd
        mnAB = n*(N+1.0)**2 / 4.0 / N
        varAB = n*m*(N+1.0)*(3+N**2)/(48.0*N**2)
    else:
        mnAB = n*(N+2.0)/4.0
        varAB = m*n*(N+2)*(N-2.0)/48/(N-1.0)
    if repeats:   # adjust variance estimates
        # compute sum(tj * rj**2)
        fac = sum(symrank**2)
        if N % 2: # N odd
            varAB = m*n*(16*N*fac-(N+1)**4)/(16.0 * N**2 * (N-1))
        else:  # N even
            varAB = m*n*(16*fac-N*(N+2)**2)/(16.0 * N * (N-1))
    z = (AB - mnAB)/sqrt(varAB)
    pval = (1-distributions.norm.cdf(abs(z)))*2.0
    return AB, pval
Example #9
0
def anderson(x,dist='norm'):
    """Anderson and Darling test for normal, exponential, or Gumbel
    (Extreme Value Type I) distribution.

    Given samples x, return A2, the Anderson-Darling statistic,
    the significance levels in percentages, and the corresponding
    critical values.

    Critical values provided are for the following significance levels
    norm/expon:   15%, 10%, 5%, 2.5%, 1%
    Gumbel:       25%, 10%, 5%, 2.5%, 1%
    logistic:     25%, 10%, 5%, 2.5%, 1%, 0.5%

    If A2 is larger than these critical values then for that significance
    level, the hypothesis that the data come from a normal (exponential)
    can be rejected.
    """
    if not dist in ['norm','expon','gumbel','extreme1','logistic']:
        raise ValueError, "Invalid distribution."
    y = sort(x)
    xbar = stats.mean(x)
    N = len(y)
    if dist == 'norm':
        s = stats.std(x)
        w = (y-xbar)/s
        z = distributions.norm.cdf(w)
        sig = array([15,10,5,2.5,1])
        critical = around(_Avals_norm / (1.0 + 4.0/N - 25.0/N/N),3)
    elif dist == 'expon':
        w = y / xbar
        z = distributions.expon.cdf(w)
        sig = array([15,10,5,2.5,1])
        critical = around(_Avals_expon / (1.0 + 0.6/N),3)
    elif dist == 'logistic':
        def rootfunc(ab,xj,N):
            a,b = ab
            tmp = (xj-a)/b
            tmp2 = exp(tmp)
            val = [sum(1.0/(1+tmp2))-0.5*N,
                   sum(tmp*(1.0-tmp2)/(1+tmp2))+N]
            return array(val)
        sol0=array([xbar,stats.std(x)])
        sol = optimize.fsolve(rootfunc,sol0,args=(x,N),xtol=1e-5)
        w = (y-sol[0])/sol[1]
        z = distributions.logistic.cdf(w)
        sig = array([25,10,5,2.5,1,0.5])
        critical = around(_Avals_logistic / (1.0+0.25/N),3)
    else:
        def fixedsolve(th,xj,N):
            val = stats.sum(xj)*1.0/N
            tmp = exp(-xj/th)
            term = sum(xj*tmp)
            term /= sum(tmp)
            return val - term
        s = optimize.fixed_point(fixedsolve, 1.0, args=(x,N),xtol=1e-5)
        xbar = -s*log(sum(exp(-x/s))*1.0/N)
        w = (y-xbar)/s
        z = distributions.gumbel_l.cdf(w)
        sig = array([25,10,5,2.5,1])
        critical = around(_Avals_gumbel / (1.0 + 0.2/sqrt(N)),3)
    i = arange(1,N+1)
    S = sum((2*i-1.0)/N*(log(z)+log(1-z[::-1])))
    A2 = -N-S
    return A2, critical, sig
Example #10
0
        init = 1
    y = sort(x)
    a,w,pw,ifault = statlib.swilk(y,a[:N/2],init)
    if not ifault in [0,2]:
        print ifault
    if N > 5000:
        print "p-value may not be accurate for N > 5000."
    if reta:
        return w, pw, a
    else:        
        return w, pw

# Values from Stephens, M A, "EDF Statistics for Goodness of Fit and
#             Some Comparisons", Journal of he American Statistical
#             Association, Vol. 69, Issue 347, Sept. 1974, pp 730-737
_Avals_norm = array([0.576, 0.656, 0.787, 0.918, 1.092])
_Avals_expon  = array([0.922, 1.078, 1.341, 1.606, 1.957])
# From Stephens, M A, "Goodness of Fit for the Extreme Value Distribution",
#             Biometrika, Vol. 64, Issue 3, Dec. 1977, pp 583-588.
_Avals_gumbel = array([0.474, 0.637, 0.757, 0.877, 1.038])
# From Stephens, M A, "Tests of Fit for the Logistic Distribution Based
#             on the Empirical Distribution Function.", Biometrika, 
#             Vol. 66, Issue 3, Dec. 1979, pp 591-595.
_Avals_logistic = array([0.426, 0.563, 0.660, 0.769, 0.906, 1.010])
def anderson(x,dist='norm'):
    """Anderson and Darling test for normal, exponential, or Gumbel
    (Extreme Value Type I) distribution.

    Given samples x, return A2, the Anderson-Darling statistic,
    the significance levels in percentages, and the corresponding
    critical values.