コード例 #1
0
ファイル: mstats.py プロジェクト: mbentz80/jzigbeercp
def trim_both(data, proportiontocut=0.2, axis=None):
    """Trims the data by masking the int(trim*n) smallest and int(trim*n) largest 
    values of data along the given axis, where n is the number of unmasked values.
    
:Inputs: 
    data : MaskedArray
        Data to trim.
    trim : float *[0.2]*
        Percentage of trimming. If n is the number of unmasked values before trimming, 
        the number of values after trimming is (1-2*trim)*n.
    axis : integer *[None]*
        Axis along which to perform the trimming.
    """
    #...................
    def _trim_1D(data, trim):
        "Private function: return a trimmed 1D array."
        nsize = data.size
        ncounts = data.count()
        ntrim = int(trim * ncounts)
        idxsort = data.argsort()
        data[idxsort[:ntrim]] = masked
        data[idxsort[ncounts-nsize-ntrim:]] = masked
        return data
    #...................
    data = masked_array(data, copy=False, subok=True)
    data.unshare_mask()
    if (axis is None): 
        return _trim_1D(data.ravel(), proportiontocut)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        return apply_along_axis(_trim_1D, axis, data, proportiontocut)
コード例 #2
0
ファイル: morestats.py プロジェクト: mbentz80/jzigbeercp
def median_cihs(data, alpha=0.05, axis=None):
    """Computes the alpha-level confidence interval for the median of the data,
    following the Hettmasperger-Sheather method.
    
:Inputs:
    data : sequence
        Input data. Masked values are discarded. The input should be 1D only
    alpha : float *[0.05]*
        Confidence degree.
    """
    def _cihs_1D(data, alpha):
        data = numpy.sort(data.compressed())
        n = len(data)
        alpha = min(alpha, 1-alpha)
        k = int(binom._ppf(alpha/2., n, 0.5))
        gk = binom.cdf(n-k,n,0.5) - binom.cdf(k-1,n,0.5)
        if gk < 1-alpha:
            k -= 1
            gk = binom.cdf(n-k,n,0.5) - binom.cdf(k-1,n,0.5)
        gkk = binom.cdf(n-k-1,n,0.5) - binom.cdf(k,n,0.5)
        I = (gk - 1 + alpha)/(gk - gkk)
        lambd = (n-k) * I / float(k + (n-2*k)*I)    
        lims = (lambd*data[k] + (1-lambd)*data[k-1],
                lambd*data[n-k-1] + (1-lambd)*data[n-k])
        return lims
    data = masked_array(data, copy=False)
    # Computes quantiles along axis (or globally)
    if (axis is None): 
        result = _cihs_1D(data.compressed(), p, var)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        result = apply_along_axis(_cihs_1D, axis, data, alpha)
    #
    return result
コード例 #3
0
ファイル: mstats.py プロジェクト: mbentz80/jzigbeercp
def trimmed_stde(data, proportiontocut=0.2, axis=None):
    """Returns the standard error of the trimmed mean for the input data, 
    along the given axis. Trimming is performed on both ends of the distribution.
    
:Inputs: 
    data : MaskedArray
        Data to trim.
    proportiontocut : float *[0.2]*
        Proportion of the data to cut from each side of the data . 
        As a result, (2*proportiontocut*n) values are actually trimmed.
    axis : integer *[None]*
        Axis along which to perform the trimming.  
    """
    #........................
    def _trimmed_stde_1D(data, trim=0.2):
        "Returns the standard error of the trimmed mean for a 1D input data."
        winsorized = winsorize(data)
        nsize = winsorized.count()
        winstd = winsorized.stdu()
        return winstd / ((1-2*trim) * numpy.sqrt(nsize))
    #........................
    data = masked_array(data, copy=False, subok=True)
    data.unshare_mask()
    if (axis is None): 
        return _trimmed_stde_1D(data.ravel(), proportiontocut)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        return apply_along_axis(_trimmed_stde_1D, axis, data, proportiontocut)
コード例 #4
0
ファイル: morestats.py プロジェクト: mbentz80/jzigbeercp
def hdquantiles(data, prob=list([.25,.5,.75]), axis=None, var=False,):
    """Computes quantile estimates with the Harrell-Davis method, where the estimates
    are calculated as a weighted linear combination of order statistics.
    If var=True, the variance of the estimate is also returned. 
    Depending on var, returns a (p,) array of quantiles or a (2,p) array of quantiles
    and variances.
    
:Inputs:
    data: ndarray
        Data array.    
    prob: Sequence
        List of quantiles to compute.
    axis : integer *[None]*
        Axis along which to compute the quantiles. If None, use a flattened array.
    var : boolean *[False]*
        Whether to return the variance of the estimate.
        
:Note:
    The function is restricted to 2D arrays.
    """
    def _hd_1D(data,prob,var):
        "Computes the HD quantiles for a 1D array."
        xsorted = numpy.squeeze(numpy.sort(data.compressed().view(ndarray)))
        n = len(xsorted)
        #.........
        hd = empty((2,len(prob)), float_)
        if n < 2:
            hd.flat = numpy.nan
            if var:
                return hd
            return hd[0]
        #......... 
        v = arange(n+1) / float(n)
        betacdf = beta.cdf
        for (i,p) in enumerate(prob):    
            _w = betacdf(v, (n+1)*p, (n+1)*(1-p))
            w = _w[1:] - _w[:-1]
            hd_mean = dot(w, xsorted)
            hd[0,i] = hd_mean
            #
            hd[1,i] = dot(w, (xsorted-hd_mean)**2)
            #
        hd[0, prob == 0] = xsorted[0]
        hd[0, prob == 1] = xsorted[-1]  
        if var:  
            hd[1, prob == 0] = hd[1, prob == 1] = numpy.nan
            return hd
        return hd[0]
    # Initialization & checks ---------
    data = masked_array(data, copy=False, dtype=float_)
    p = numpy.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None): 
        result = _hd_1D(data, p, var)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        result = apply_along_axis(_hd_1D, axis, data, p, var)
    #
    return masked_array(result, mask=numpy.isnan(result))
コード例 #5
0
ファイル: mstats.py プロジェクト: mbentz80/jzigbeercp
def mquantiles(data, prob=list([.25,.5,.75]), alphap=.4, betap=.4, axis=None):
    """Computes empirical quantiles for a *1xN* data array.
Samples quantile are defined by:
*Q(p) = (1-g).x[i] +g.x[i+1]*
where *x[j]* is the jth order statistic, 
with *i = (floor(n*p+m))*, *m=alpha+p*(1-alpha-beta)* and *g = n*p + m - i)*.

Typical values of (alpha,beta) are:
    
    - (0,1)    : *p(k) = k/n* : linear interpolation of cdf (R, type 4)
    - (.5,.5)  : *p(k) = (k+1/2.)/n* : piecewise linear function (R, type 5)
    - (0,0)    : *p(k) = k/(n+1)* : (R type 6)
    - (1,1)    : *p(k) = (k-1)/(n-1)*. In this case, p(k) = mode[F(x[k])].
      That's R default (R type 7)
    - (1/3,1/3): *p(k) = (k-1/3)/(n+1/3)*. Then p(k) ~ median[F(x[k])].
      The resulting quantile estimates are approximately median-unbiased
      regardless of the distribution of x. (R type 8)
    - (3/8,3/8): *p(k) = (k-3/8)/(n+1/4)*. Blom.
      The resulting quantile estimates are approximately unbiased
      if x is normally distributed (R type 9)
    - (.4,.4)  : approximately quantile unbiased (Cunnane)
    - (.35,.35): APL, used with PWM

:Parameters:
    x : Sequence
        Input data, as a sequence or array of dimension at most 2.
    prob : Sequence *[(0.25, 0.5, 0.75)]*
        List of quantiles to compute.
    alpha : Float (*[0.4]*)
        Plotting positions parameter.
    beta : Float (*[0.4]*)
        Plotting positions parameter.
    axis : Integer *[None]*
        Axis along which to compute quantiles. If *None*, uses the whole 
        (flattened/compressed) dataset.
    """
    def _quantiles1D(data,m,p):
        x = numpy.sort(data.compressed())
        n = len(x)
        if n == 0:
            return masked_array(numpy.empty(len(p), dtype=float_), mask=True)
        elif n == 1:
            return masked_array(numpy.resize(x, p.shape), mask=nomask)
        aleph = (n*p + m)
        k = numpy.floor(aleph.clip(1, n-1)).astype(int_)
        gamma = (aleph-k).clip(0,1)
        return (1.-gamma)*x[(k-1).tolist()] + gamma*x[k.tolist()]

    # Initialization & checks ---------
    data = masked_array(data, copy=False)
    p = narray(prob, copy=False, ndmin=1)
    m = alphap + p*(1.-alphap-betap)
    # Computes quantiles along axis (or globally)
    if (axis is None): 
        return _quantiles1D(data, m, p)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        return apply_along_axis(_quantiles1D, axis, data, m, p)
コード例 #6
0
ファイル: mstats.py プロジェクト: mbentz80/jzigbeercp
def mmedian(data, axis=None):
    """Returns the median of data along the given axis. Missing data are discarded."""
    def _median1D(data):
        x = numpy.sort(data.compressed())
        if x.size == 0:
            return masked
        return numpy.median(x)
    data = masked_array(data, subok=True, copy=True)
    if axis is None:
        return _median1D(data)
    else:
        return apply_along_axis(_median1D, axis, data)
コード例 #7
0
ファイル: morestats.py プロジェクト: mbentz80/jzigbeercp
def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None):
    """Computes the standard error of the Harrell-Davis quantile estimates by jackknife.
    
:Inputs:
    data: ndarray
        Data array.    
    prob: Sequence
        List of quantiles to compute.
    axis : integer *[None]*
        Axis along which to compute the quantiles. If None, use a flattened array.
    var : boolean *[False]*
        Whether to return the variance of the estimate.
    stderr : boolean *[False]*
        Whether to return the standard error of the estimate.
        
:Note:
    The function is restricted to 2D arrays.
    """  
    def _hdsd_1D(data,prob):
        "Computes the std error for 1D arrays."
        xsorted = numpy.sort(data.compressed())
        n = len(xsorted)
        #.........
        hdsd = empty(len(prob), float_)
        if n < 2:
            hdsd.flat = numpy.nan
        #......... 
        vv = arange(n) / float(n-1)
        betacdf = beta.cdf
        #
        for (i,p) in enumerate(prob):    
            _w = betacdf(vv, (n+1)*p, (n+1)*(1-p)) 
            w = _w[1:] - _w[:-1]
            mx_ = numpy.fromiter([dot(w,xsorted[r_[range(0,k),
                                                   range(k+1,n)].astype(int_)])
                                  for k in range(n)], dtype=float_)
            mx_var = numpy.array(mx_.var(), copy=False, ndmin=1) * n / float(n-1)
            hdsd[i] = float(n-1) * sqrt(numpy.diag(mx_var).diagonal() / float(n))
        return hdsd
    # Initialization & checks ---------
    data = masked_array(data, copy=False, dtype=float_)
    p = numpy.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None): 
        result = _hdsd_1D(data.compressed(), p)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        result = apply_along_axis(_hdsd_1D, axis, data, p)
    #
    return masked_array(result, mask=numpy.isnan(result)).ravel()
コード例 #8
0
ファイル: mstats.py プロジェクト: mbentz80/jzigbeercp
def idealfourths(data, axis=None):
    """Returns an estimate of the interquartile range of the data along the given
    axis, as computed with the ideal fourths.
    """
    def _idf(data):
        x = numpy.sort(data.compressed())
        n = len(x)
        (j,h) = divmod(n/4. + 5/12.,1)
        qlo = (1-h)*x[j] + h*x[j+1]
        k = n - j
        qup = (1-h)*x[k] + h*x[k-1]
        return qup - qlo
    data = masked_array(data, copy=False)
    if (axis is None): 
        return _idf(data)
    else:
        return apply_along_axis(_idf, axis, data) 
コード例 #9
0
ファイル: mstats.py プロジェクト: mbentz80/jzigbeercp
def stde_median(data, axis=None):
    """Returns the McKean-Schrader estimate of the standard error of the sample
    median along the given axis.
    """
    def _stdemed_1D(data):
        sorted = numpy.sort(data.compressed())
        n = len(sorted)
        z = 2.5758293035489004
        k = int(round((n+1)/2. - z * sqrt(n/4.),0))
        return ((sorted[n-k] - sorted[k-1])/(2.*z))
    #
    data = masked_array(data, copy=False, subok=True)
    if (axis is None):
        return _stdemed_1D(data)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        return apply_along_axis(_stdemed_1D, axis, data)
コード例 #10
0
ファイル: morestats.py プロジェクト: mbentz80/jzigbeercp
def rank_data(data, axis=None, use_missing=False):
    """Returns the rank (also known as order statistics) of each data point 
    along the given axis.
    If some values are tied, their rank is averaged.
    If some values are masked, their rank is set to 0 if use_missing is False, or
    set to the average rank of the unmasked values if use_missing is True.
    
:Inputs:
    data : sequence
        Input data. The data is transformed to a masked array
    axis : integer *[None]*
        Axis along which to perform the ranking. If None, the array is first
        flattened. An exception is raised if the axis is specified for arrays
        with a dimension larger than 2
    use_missing : boolean *[False]*
        Flag indicating whether the masked values have a rank of 0 (False) or
        equal to the average rank of the unmasked values (True)    
    """
    #
    def _rank1d(data, use_missing=False):
        n = data.count()
        rk = numpy.empty(data.size, dtype=float_)
        idx = data.argsort()
        rk[idx[:n]] = numpy.arange(1,n+1)
        #
        if use_missing:
            rk[idx[n:]] = (n+1)/2.
        else:
            rk[idx[n:]] = 0
        #
        repeats = find_repeats(data)
        for r in repeats[0]:
            condition = (data==r).filled(False)
            rk[condition] = rk[condition].mean()
        return rk
    #
    data = masked_array(data, copy=False)
    if axis is None:
        if data.ndim > 1:
            return _rank1d(data.ravel(), use_missing).reshape(data.shape)
        else:
            return _rank1d(data, use_missing)
    else:
        return apply_along_axis(_rank1d, axis, data, use_missing) 
コード例 #11
0
ファイル: mstats.py プロジェクト: mbentz80/jzigbeercp
def trim_tail(data, proportiontocut=0.2, tail='left', axis=None):
    """Trims the data by masking int(trim*n) values from ONE tail of the data
    along the given axis, where n is the number of unmasked values.
    
:Inputs: 
    data : MaskedArray
        Data to trim.
    trim : float *[0.2]*
        Percentage of trimming. If n is the number of unmasked values before trimming, 
        the number of values after trimming is (1-2*trim)*n.
    axis : integer *[None]*
        Axis along which to perform the trimming.
    """
    #...................
    def _trim_1D(data, trim, left):
        "Private function: return a trimmed 1D array."
        nsize = data.size
        ncounts = data.count()
        ntrim = int(trim * ncounts)
        idxsort = data.argsort()
        if left:
            data[idxsort[:ntrim]] = masked
        else:
            data[idxsort[ncounts-nsize-ntrim:]] = masked
        return data
    #...................
    data = masked_array(data, copy=False, subok=True)
    data.unshare_mask()
    #
    if not isinstance(tail, str):
        raise TypeError("The tail argument should be in ('left','right')")
    tail = tail.lower()[0]
    if tail == 'l':
        left = True
    elif tail == 'r':
        left=False
    else:
        raise ValueError("The tail argument should be in ('left','right')")
    #
    if (axis is None): 
        return _trim_1D(data.ravel(), proportiontocut, left)
    else:
        assert data.ndim <= 2, "Array should be 2D at most !"
        return apply_along_axis(_trim_1D, axis, data, proportiontocut, left)
コード例 #12
0
ファイル: morestats.py プロジェクト: mbentz80/jzigbeercp
def mjci(data, prob=[0.25,0.5,0.75], axis=None):
    """Returns the Maritz-Jarrett estimators of the standard error of selected 
    experimental quantiles of the data.
    
:Input:
    data : sequence
        Input data.
    prob : sequence *[0.25,0.5,0.75]*
        Sequence of quantiles whose standard error must be estimated.
    axis : integer *[None]*
        Axis along which to compute the standard error.
    """
    def _mjci_1D(data, p):
        data = data.compressed()
        sorted = numpy.sort(data)
        n = data.size
        prob = (numpy.array(p) * n + 0.5).astype(int_)
        betacdf = beta.cdf
        #
        mj = empty(len(prob), float_)
        x = arange(1,n+1, dtype=float_) / n
        y = x - 1./n
        for (i,m) in enumerate(prob):
            (m1,m2) = (m-1, n-m)
            W = betacdf(x,m-1,n-m) - betacdf(y,m-1,n-m)
            C1 = numpy.dot(W,sorted)
            C2 = numpy.dot(W,sorted**2)
            mj[i] = sqrt(C2 - C1**2)
        return mj
    #
    data = masked_array(data, copy=False)
    assert data.ndim <= 2, "Array should be 2D at most !"
    p = numpy.array(prob, copy=False, ndmin=1)
    # Computes quantiles along axis (or globally)
    if (axis is None): 
        return _mjci_1D(data, p)
    else:
        return apply_along_axis(_mjci_1D, axis, data, p)