def pskewness(data, m=None, s=None): """pskewness(data [,m [,s]]) -> population skewness of data. This returns γ₁ "\\N{GREEK SMALL LETTER GAMMA}\\N{SUBSCRIPT ONE}", the population skewness. For more information about skewness, see the sample skewness function ``skewness``. >>> pskewness([1.25, 1.5, 1.5, 1.75, 1.75, 2.5, 2.75, 4.5]) ... #doctest: +ELLIPSIS 1.37474650254... """ n, total = stats._std_moment(data, m, s, 3) assert n >= 0 if n <= 1: raise StatsError('no skewness is defined for empty data') return v.div(total, n)
def pkurtosis(data, m=None, s=None): """pkurtosis(data [,m [,s]]) -> population kurtosis of data. This returns γ₂ "\\N{GREEK SMALL LETTER GAMMA}\\N{SUBSCRIPT TWO}", the population kurtosis relative to that of the normal distribution, also known as the excess kurtosis. For the "kurtosis proper" known as β₂ "\\N{GREEK SMALL LETTER BETA}\\N{SUBSCRIPT TWO}", add 3 to the result. For more information about kurtosis, see the sample kurtosis function ``kurtosis``. >>> pkurtosis([1.25, 1.5, 1.5, 1.75, 1.75, 2.5, 2.75, 4.5]) ... #doctest: +ELLIPSIS 0.7794232987... """ n, total = stats._std_moment(data, m, s, 4) assert n >= 0 assert total >= 1 if n <= 1: raise StatsError('no kurtosis is defined for empty data') kurt = v.div(total, n) return v.sub(kurt, 3)
def kurtosis(data, m=None, s=None): """kurtosis(data [,m [,s]]) -> sample excess kurtosis of data. The kurtosis of a distribution is a measure of its shape. This function returns an estimate of the sample excess kurtosis usually known as g₂ "g\\N{SUBSCRIPT TWO}". For the population kurtosis, see ``pkurtosis``. WARNING: The mathematical terminology and notation related to kurtosis is often inconsistent and contradictory. See Wolfram Mathworld for further details: http://mathworld.wolfram.com/Kurtosis.html >>> kurtosis([1.25, 1.5, 1.5, 1.75, 1.75, 2.5, 2.75, 4.5]) ... #doctest: +ELLIPSIS 3.03678892733564... If you already know one or both of the population mean and standard deviation, you can pass the mean as optional argument m and/or the standard deviation as s: >>> kurtosis([1.25, 1.5, 1.5, 1.75, 1.75, 2.5, 2.75, 4.5], m=2.25, s=1) 2.3064453125 CAUTION: "Garbage in, garbage out" applies here. You can pass any values you like as ``m`` or ``s``, but if they are not sensible estimates for the mean and standard deviation, the result returned as the kurtosis will likewise not be sensible. If you give either m or s, and the calculated kurtosis is out of range, a warning is raised. If m or s are not given, or are None, they are estimated from the data. If data is an iterable of sequences, each inner sequence represents a row of data, and ``kurtosis`` operates on each column. Every row must have the same number of columns, or ValueError is raised. >>> data = [[0, 1], ... [1, 5], ... [2, 6], ... [5, 7]] ... >>> kurtosis(data) #doctest: +ELLIPSIS [1.50000000000000..., 2.23486717956161...] Similarly, if either m or s are given, they must be either a single number or have the same number of items: >>> kurtosis(data, m=[3, 5], s=2) #doctest: +ELLIPSIS [-0.140625, 18.4921875] The kurtosis of a population is a measure of the peakedness and weight of the tails. The normal distribution has kurtosis of zero; positive kurtosis generally has heavier tails and a sharper peak than normal; negative kurtosis generally has lighter tails and a flatter peak. There is no upper limit for kurtosis, and a lower limit of -2. Higher kurtosis means more of the variance is the result of infrequent extreme deviations, as opposed to frequent modestly sized deviations. CAUTION: As a rule of thumb, a non-zero value for kurtosis should only be treated as meaningful if its absolute value is larger than approximately twice its standard error. See also ``stderrkurtosis``. """ n, total = stats._std_moment(data, m, s, 4) assert n >= 0 v.assert_(lambda x: x >= 1, total) if n < 4: raise StatsError('sample kurtosis requires at least 4 data points') q = (n-1)/((n-2)*(n-3)) gamma2 = v.div(total, n) # Don't do this:- # kurt = v.mul((n+1)*q, gamma2) # kurt = v.sub(kurt, 3*(n-1)*q) # Even though the above two commented out lines are mathematically # equivalent to the next two, and cheaper, they appear to be # slightly less accurate. kurt = v.sub(v.mul(n+1, gamma2), 3*(n-1)) kurt = v.mul(q, kurt) if v.isiterable(kurt): out_of_range = any(x < -2 for x in kurt) else: out_of_range = kurt < -2 if m is s is None: assert not out_of_range, 'kurtosis failed: %r' % kurt # This is a "should never happen" condition, hence an assertion. else: # This, on the other hand, can easily happen if the caller # gives junk values for m or s. The difference between a junk # value and a legitimate value can be surprisingly subtle! if out_of_range: import warnings warnings.warn('calculated kurtosis out of range') return kurt
def skewness(data, m=None, s=None): """skewness(data [,m [,s]]) -> sample skewness of data. The skewness, or third standardised moment, of data is the degree to which it is skewed to the left or right of the mean. This returns g₁ "g\\N{SUBSCRIPT ONE}", the sample skewness. For the population skewness, see function ``pskewness``. WARNING: The mathematical terminology and notation related to skewness is often inconsistent and contradictory. See Wolfram Mathworld for further details: http://mathworld.wolfram.com/Skewness.html >>> skewness([1.25, 1.5, 1.5, 1.75, 1.75, 2.5, 2.75, 4.5]) ... #doctest: +ELLIPSIS 1.71461013539878... If you already know one or both of the population mean and standard deviation, you can pass the mean as optional argument m and/or the standard deviation as s: >>> skewness([1.25, 1.5, 1.5, 1.75, 1.75, 2.5, 2.75, 4.5], m=2.25, s=1) ... #doctest: +ELLIPSIS 1.47132881615329... CAUTION: "Garbage in, garbage out" applies here. You can pass any values you like as ``m`` or ``s``, but if they are not sensible estimates for the mean and standard deviation, the result returned as the skewness will likewise not be sensible. If m or s are not given, or are None, they are estimated from the data. If data is an iterable of sequences, each inner sequence represents a row of data, and ``skewness`` operates on each column. Every row must have the same number of columns, or ValueError is raised. >>> data = [[0, 1], ... [1, 5], ... [2, 6], ... [5, 7]] ... >>> skewness(data) #doctest: +ELLIPSIS [1.19034012827899..., -1.44305883553164...] Similarly, if either m or s are given, they must be either a single number or have the same number of items as the data: >>> skewness(data, m=[2.5, 5.0], s=2) #doctest: +ELLIPSIS [-0.189443057077845..., -2.97696232550900...] A negative skewness indicates that the distribution's left-hand tail is longer than the tail on the right-hand side, and that the majority of the values (including the median) are to the right of the mean. A positive skew indicates that the right-hand tail is longer, and that the majority of values are to the left of the mean. A zero skew indicates that the values are evenly distributed around the mean, often but not necessarily implying the distribution is symmetric. CAUTION: As a rule of thumb, a non-zero value for skewness should only be treated as meaningful if its absolute value is larger than approximately twice its standard error. See also ``stderrskewness``. """ n, total = stats._std_moment(data, m, s, 3) assert n >= 0 if n < 3: raise StatsError('sample skewness requires at least three items') skew = v.div(total, n) k = math.sqrt(n*(n-1))/(n-2) return v.mul(k, skew)