def peak_2_valley(self, s, axis=0): r""" Another important measurement of a signal, considering a semantically coherent sampling interval, for instance a fixed-length interval or one period of a rotation, is the peak-to-valley (PV) value which reflects the amplitude spread of a signal: $PV=\frac{1}{2}\left(\max(s)\quad -\quad \min(s)\right)$ **Parameters** * **s:** * **axis:** **Returns** * **peak_2_valley:** ## Snippet code ```python >>> from scipy.stats import norm >>> from rackio_AI import RackioAIFE >>> feature_extraction = RackioAIFE() >>> s = norm.rvs(size=1000, random_state=3) >>> feature_extraction.stats.peak_2_valley(s) array([3.34321422]) >>> s = norm.rvs(size=(1000,2), random_state=3) >>> feature_extraction.stats.peak_2_valley(s) array([2.99293034, 3.34321422]) ``` """ s = Utils.check_dataset_shape(s) return (np.max(s, axis=axis) - np.min(s, axis=axis)) / 2
def kurt(self, s, axis: int = 0, fisher: bool = True, bias: bool = True, nan_policy: str = 'propagate'): r""" Compute the kurtosis (Fisher or Pearson) of a dataset $s$ Kurtosis is the fourth central moment divided by the square of the variance. If Fisher's definiton is used, then 3.0 is subtracted from the result to give 0.0 for a normal distribution. If bias is False then the kurtosis is calculated using k statistics to eliminate bias coming from biased moment estimators **Parameters** * **s:** (2d array) Data for which the kurtosis is calculated * **axis:** (int or None) Axis along which the kurtosis is calculated. Default is 0. If None, compute over the whole array dataset. * **fisher:** (bool) If True, Fisher's definition is used (normal ==> 0.0). If False, Pearson's deifnition is used (normal ==> 3.0) * **bias:** (bool) If False, then the calculations are corrected for statistical bias. * **nan_policy:** ({'propagate', 'raise', 'omit'}) Defines how to handle when inputs contains nan. 'propagate' returns nan, 'raise' throws an error, 'omit' performs the calculations ignoring nan values. Default is propagate. **returns** * **kurtosis** (array 1xcols_dataset) The kurtosis of values along an axis. If all values are equal, return -3 for Fisher's definition and 0 for Pearson's definition ## Snippet Code ```python >>> from scipy.stats import norm >>> from rackio_AI import RackioAIFE >>> feature_extraction = RackioAIFE() >>> s = norm.rvs(size=1000, random_state=3) >>> feature_extraction.stats.kurt(s) array([-0.06928694]) >>> s = norm.rvs(size=(1000,2), random_state=3) >>> feature_extraction.stats.kurt(s) array([-0.00560946, -0.1115389 ]) ``` """ s = Utils.check_dataset_shape(s) return kurtosis(s, axis=axis, fisher=fisher, bias=bias, nan_policy=nan_policy)
def mean(self, s, axis=None, dtype=None, out=None, keepdims=np._NoValue): r""" Compute the arithmetic mean along the specified axis. Returns the average of the array elements. The average is taken over the flattened array by default, otherwise over the specified axis. `float64` intermediate and return values are used for integer inputs. **Parameters** * **s:** (2d array_like) Array containing numbers whose mean is desired. If `s` is not an array, a conversion is attempted. * **axis:** (None or int or tuple of ints, optional) Axis or axes along which the means are computed. The default is to compute the mean of the flattened array. If this is a tuple of ints, a mean is performed over multiple axes, instead of a single axis or all the axes as before. * **dtype:** (data-type, optional) Type to use in computing the mean. For integer inputs, the default is `float64`; for floating point inputs, it is the same as the input dtype. * **out:** (ndarray, optional) Alternate output array in which to place the result. The default is ``None``; if provided, it must have the same shape as the expected output, but the type will be cast if necessary. * **keepdims:** (bool, optional) If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the result will broadcast correctly against the input array. If the default value is passed, then `keepdims` will not be passed through to the `mean` method of sub-classes of `ndarray`, however any non-default value will be. If the sub-class' method does not implement `keepdims` any exceptions will be raised. **Returns** * **m:** (ndarray, see dtype parameter above) If `out=None`, returns a new array containing the mean values, otherwise a reference to the output array is returned. ## Snippet code ```python >>> from rackio_AI import RackioAIFE >>> feature_extraction = RackioAIFE() >>> s = np.array([[1, 2], [3, 4]]) >>> feature_extraction.stats.mean(s) 2.5 >>> feature_extraction.stats.mean(s, axis=0) array([2., 3.]) >>> feature_extraction.stats.mean(s, axis=1) array([1.5, 3.5]) ``` """ s = Utils.check_dataset_shape(s) return np.mean(s, axis=axis, dtype=dtype, out=out, keepdims=keepdims)
def median(self, s, axis=None, out=None, overwrite_input=False, keepdims=False): r""" Compute the median along the specified axis. Returns the median of the array elements. **Parameters** * **s:** (2d array_like) Input array or object that can be converted to an array. * **axis:** ({int, sequence of int, None}, optional) Axis or axes along which the medians \ are computed. The default is to compute the median along a flattened version of the array. * **out:** (ndarray, optional) Alternative output array in which to place the result. It must have the same shape and buffer length as the expected output, but the type (of the output) will be cast if necessary. * **overwrite_input:** (bool, optional) If True, then allow use of memory of input array `s` for calculations. The input array will be modified by the call to `median`. This will save memory when you do not need to preserve the contents of the input array. Treat the input as undefined, but it will probably be fully or partially sorted. Default is False. If `overwrite_input` is ``True`` and `s` is not already an `ndarray`, an error will be raised. * **keepdims:** (bool, optional) If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the result will broadcast correctly against the original `array`. **Returns** * **median:** (ndarray) A new array holding the result. If the input contains integers or floats smaller than ``float64``, then the output data-type is ``np.float64``. Otherwise, the data-type of the output is the same as that of the input. If `out` is specified, that array is returned instead. ## Notes Given a vector $V$ of length $N$, the median of $V$ is the middle value of a sorted copy of $V$, $V_{sorted}$ - i e., $V_{sorted}\left[\frac{N-1}{2}\right]$, when $N$ is odd, and the average of the two middle values of $V_{sorted}$ when $N$ is even. ## Snippet code ```python >>> from rackio_AI import RackioAIFE >>> feature_extraction = RackioAIFE() >>> s = np.array([[10, 7, 4], [3, 2, 1]]) >>> feature_extraction.stats.median(s) 3.5 >>> feature_extraction.stats.median(s, axis=0) array([6.5, 4.5, 2.5]) >>> feature_extraction.stats.median(s, axis=1) array([7., 2.]) >>> m = feature_extraction.stats.median(s, axis=0) >>> out = np.zeros_like(m) >>> feature_extraction.stats.median(s, axis=0, out=m) array([6.5, 4.5, 2.5]) >>> m array([6.5, 4.5, 2.5]) >>> b = s.copy() >>> feature_extraction.stats.median(b, axis=1, overwrite_input=True) array([7., 2.]) >>> assert not np.all(s==b) >>> b = s.copy() >>> feature_extraction.stats.median(b, axis=None, overwrite_input=True) 3.5 >>> assert not np.all(s==b) ``` """ s = Utils.check_dataset_shape(s) return np.median(s, axis=axis, out=out, overwrite_input=overwrite_input, keepdims=keepdims)
def peak(self, s, ref=None, axis=0, rate=None, **kwargs): r""" I we consider only the maximum amplitude relative to zero $s_{ref}=0$ or a general reference level $s_{ref}$, we get the peak value $peak = \max\left(s_{i}-ref\right)$ Often the peak is used in conjunction with other statistical parameters, for instance the peak-to-average rate. $peak = \frac{\max\left(s_{i}-ref\right)}{\frac{1}{N}\sum_{i=0}^{N-1}s_{i}}$ or peak-to-median rate **Parameters** * **s:** * **ref:** * **axis:** * **rate:** **Returns** * **peak:** ## Snippet code ```python >>> from scipy.stats import norm >>> from rackio_AI import RackioAIFE >>> feature_extraction = RackioAIFE() >>> s = norm.rvs(size=1000, random_state=3) >>> feature_extraction.stats.peak(s) array([1.91382976]) >>> s = norm.rvs(size=(1000,2), random_state=3) >>> feature_extraction.stats.peak(s) array([1.0232499 , 3.26594839]) ``` """ s = Utils.check_dataset_shape(s) if not ref == None: _peak = np.max(s - ref, axis=axis) else: _peak = np.max(s - s[0, :], axis=axis) if not rate == None: if rate.lower() == 'average': return _peak / self.mean(s, **kwargs) elif rate.lower() == 'median': return _peak / self.median(s, **kwargs) else: return _peak
def rms(self, s, axis=None, dtype=None, out=None, keepdims=np._NoValue, initial=np._NoValue): r""" Root Mean Square One of the most important basic features that can be extracted directly from the time-domain signal is the RMS which describe the energy of the signal. It is defined as the square root of the average squared value of the signal and can also be called the normalized energy of the signal. $RMS = \sqrt{\frac{1}{n}\sum_{i=0}^{n-1}s_{i}^{2}}$ Especially in vibration analysis the RMS is used to perform fault detection, i.e. triggering an alarm, whenever the RMS surpasses a level that depends on the size of the machine, the nature of the signal (for instance velocity or acceleration), the position of the accelerometer, and so on. After the detection of the existence of a failure, fault diagnosis is performed relying on more sophisticated features. For instance the ISO 2372 (VDI 2056) norms define three different velocity RMS alarm levels for four different machine classes divided by power and foundations of the rotating machines. RMS of array elements over a given axis. **Parameters** * **s:** (2d array_like) Elements to get RMS. * **axis:** (None or int or tuple of ints, optional) Axis or axes along which a RMS is performed. The default, axis=None, will get RMS of all the elements of the input array. If axis is negative it counts from the last to the first axis. If axis is a tuple of ints, a RMS is performed on all of the axes specified in the tuple instead of a single axis or all the axes as before. * **dtype:** (dtype, optional) The type of the returned array and of the accumulator in which the elements are summed. The dtype of `s` is used by default unless `s` has an integer dtype of less precision than the default platform integer. In that case, if `s` is signed then the platform integer is used while if `s` is unsigned then an unsigned integer of the same precision as the platform integer is used. * **out:** (ndarray, optional) Alternative output array in which to place the result. It must have the same shape as the expected output, but the type of the output values will be cast if necessary. * **keepdims:** (bool, optional) If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the result will broadcast correctly against the input array. If the default value is passed, then `keepdims` will not be passed through to the `sum` method of sub-classes of `ndarray`, however any non-default value will be. If the sub-class' method does not implement `keepdims` any exceptions will be raised. * **initial:** (scalar, optional) Starting value for the sum. **Returns** * **RMS_along_axis:** (darray) An array with the same shape as `s`, with the specified axis removed. If `s` is a 0-d array, or if `axis` is None, a scalar is returned. If an output array is specified, a reference to `out` is returned. ## Snippet code ```python >>> from rackio_AI import RackioAIFE >>> feature_extraction = RackioAIFE() >>> feature_extraction.stats.rms(np.array([0.5, 1.5])) 1.118033988749895 >>> feature_extraction.stats.rms(np.array([0.5, 0.7, 0.2, 1.5]), dtype=np.int32) 0.7071067811865476 >>> feature_extraction.stats.rms(np.array([[0, 1], [0, 5]])) 3.605551275463989 >>> feature_extraction.stats.rms(np.array([[0, 1], [0, 5]]), axis=0) array([0. , 3.60555128]) >>> feature_extraction.stats.rms(np.array([[0, 1], [0, 5]]), axis=1) array([0.70710678, 3.53553391]) ``` You can also start the sum with a value other than zero: ```python >>> feature_extraction.stats.rms(np.array([2, 7, 10]), initial=5) 7.2571803523590805 ``` """ s = Utils.check_dataset_shape(s) return (np.sum(s**2, axis=axis, dtype=dtype, out=out, keepdims=keepdims, initial=initial) / s.shape[0])**0.5
def skew(self, s, axis=0, bias=True, nan_policy='propagate'): r""" Compute the sample skewness of a data set. For normally distributed data, the skewness should be about zero. For unimodal continuous distributions, a skewness value greater than zero means that there is more weight in the right tail of the distribution. The function `skewtest` can be used to determine if the skewness value is close enough to zero, statistically speaking. **Parameters** * **s:** (ndarray) Input array. * **axis:** (int or None, optional) Axis along which skewness is calculated. Default is 0. If None, compute over the whole array `s`. * **bias:** (bool, optional) If False, then the calculations are corrected for statistical bias. * **nan_policy:** ({'propagate', 'raise', 'omit'}, optional) Defines how to handle when input contains nan. The following options are available (default is 'propagate'): * 'propagate': returns nan * 'raise': throws an error * 'omit': performs the calculations ignoring nan values **Returns** * **skewness:** (ndarray) The skewness of values along an axis, returning 0 where all values are equal. ## Notes The sample skewness is computed as the Fisher-Pearson coefficient of skewness, i.e. $g_1=\frac{m_3}{m_2^{3/2}}$ where $m_i=\frac{1}{N}\sum_{n=1}^N(x[n]-\bar{x})^i$ is the biased sample $i\texttt{th}$ central moment, and $\bar{x}$ is the sample mean. If $bias$ is False, the calculations are corrected for bias and the value computed is the adjusted Fisher-Pearson standardized moment coefficient, i.e. $G_1=\frac{k_3}{k_2^{3/2}}=\frac{\sqrt{N(N-1)}}{N-2}\frac{m_3}{m_2^{3/2}}.$ ## References .. [1] Zwillinger, D. and Kokoska, S. (2000). CRC Standard Probability and Statistics Tables and Formulae. Chapman & Hall: New York. 2000. Section 2.2.24.1 ## Snippet code ```python >>> import numpy as np >>> from rackio_AI import RackioAIFE >>> feature_extraction = RackioAIFE() >>> s = np.array([1, 2, 3, 4, 5]) >>> feature_extraction.stats.skew(s) array([0.]) >>> s = np.array([2, 8, 0, 4, 1, 9, 9, 0]) >>> feature_extraction.stats.skew(s) array([0.26505541]) ``` """ s = Utils.check_dataset_shape(s) return skew(s, axis=axis, bias=bias, nan_policy=nan_policy)
def std(self, s, axis=None, dtype=None, out=None, ddof=0, keepdims=np._NoValue): r""" Compute the standard deviation along the specified axis. Returns the standard deviation, a measure of the spread of a distribution, of the array elements. The standard deviation is computed for the flattened array by default, otherwise over the specified axis. **Parameters** * **s:** (2d array_like) Calculate the standard deviation of these values. * **axis:** (None or int or tuple of ints, optional) Axis or axes along which the standard deviation is computed. The default is to compute the standard deviation of the flattened array. If this is a tuple of ints, a standard deviation is performed over multiple axes, instead of a single axis or all the axes as before. * **dtype:** (dtype, optional) Type to use in computing the standard deviation. For arrays of integer type the default is float64, for arrays of float types it is the same as the array type. * **out:** (ndarray, optional) Alternative output array in which to place the result. It must have the same shape as the expected output but the type (of the calculated values) will be cast if necessary. * **ddof:** (int, optional) Means Delta Degrees of Freedom. The divisor used in calculations is $N - ddof$, where $N$ represents the number of elements. By default `ddof` is zero. * **keepdims:** (bool, optional) If this is set to True, the axes which are reduced are left in the result as dimensions with size one. With this option, the result will broadcast correctly against the input array. If the default value is passed, then `keepdims` will not be passed through to the `std` method of sub-classes of `ndarray`, however any non-default value will be. If the sub-class' method does not implement `keepdims` any exceptions will be raised. **Returns** * **standard_deviation:** (ndarray) If `out` is None, return a new array containing the standard deviation, otherwise return a reference to the output array. ## Notes The standard deviation is the square root of the average of the squared deviations from the mean, i.e. $\mu = \frac{1}{N}\sum_{i=1}^{n}s_{i}$ $std = \sqrt{\frac{1}{N}\sum_{i=1}^{n}|s_{i}-\mu|^2}$ ## Snippet code ```python >>> import numpy as np >>> from rackio_AI import RackioAIFE >>> feature_extraction = RackioAIFE() >>> s = np.array([[1, 2], [3, 4]]) >>> feature_extraction.stats.std(s, axis=0) array([1., 1.]) >>> feature_extraction.stats.std(s, axis=1) array([0.5, 0.5]) ``` ### In single precision, std() can be inaccurate ```python >>> s = np.zeros((2, 512*512), dtype=np.float32) >>> s[0, :] = 1.0 >>> s[1, :] = 0.1 >>> feature_extraction.stats.std(s) 0.45000005 >>> s = np.array([[14, 8, 11, 10], [7, 9, 10, 11], [10, 15, 5, 10]]) >>> feature_extraction.stats.std(s) 2.614064523559687 ``` """ s = Utils.check_dataset_shape(s) return np.std(s, axis=axis, dtype=dtype, out=dtype, ddof=ddof, keepdims=keepdims)