Esempio n. 1
0
def median_abs_dev(X, W=[], precision=1, c=1.0):
    """
    Compute the (weighted) median absolute deviation.

    mad = median(abs(x - median(x))) / c

    Parameters
    ----------
    X : numpy array of floats or integers
        values
    W : numpy array of floats or integers
        weights
    precision : integer
        number of decimal places to consider weights
    c : float
        constant used as divisor for mad computation;
        c = 0.6745 is used to convert from mad to standard deviation

    Returns
    -------
    mad : float
        (weighted) median absolute deviation

    Examples
    --------
    >>> import numpy as np
    >>> from mindboggle.guts.compute import median_abs_dev
    >>> X = np.array([1,2,4,7,8])
    >>> W = np.array([.1,.1,.3,.2,.3])
    >>> precision = 1
    >>> # [1, 2, 4, 4, 4, 7, 7, 8, 8, 8]
    >>> median_abs_dev(X, W, precision)
    2.0

    """
    import numpy as np
    from mindboggle.guts.compute import weighted_to_repeated_values

    # Make sure arguments have the correct type:
    if not isinstance(X, np.ndarray):
        X = np.array(X)
    if not isinstance(W, np.ndarray):
        W = np.array(W)
    if not isinstance(precision, int):
        precision = int(precision)

    if np.size(W):
        X = weighted_to_repeated_values(X, W, precision)

    mad = np.median(np.abs(X - np.median(X))) / c

    return mad
Esempio n. 2
0
def weighted_median(X, W=[], precision=1):
    """
    Compute a weighted median.

    Parameters
    ----------
    X : numpy array of floats or integers
        values
    W : numpy array of floats or integers
        weights
    precision : integer
        number of decimal places to consider weights

    Returns
    -------
    wmedian : float
        weighted median

    Examples
    --------
    >>> import numpy as np
    >>> from mindboggle.guts.compute import weighted_median
    >>> X = np.array([1,2,4,7,8])
    >>> W = np.array([.1,.1,.3,.2,.3])
    >>> precision = 1
    >>> # [1, 2, 4, 4, 4, 7, 7, 8, 8, 8]
    >>> weighted_median(X, W, precision)
    5.5

    """
    import numpy as np
    from mindboggle.guts.compute import weighted_to_repeated_values

    # Make sure arguments have the correct type:
    if not isinstance(X, np.ndarray):
        X = np.array(X)
    if not isinstance(W, np.ndarray):
        W = np.array(W)
    if not isinstance(precision, int):
        precision = int(precision)

    wmedian = np.median(weighted_to_repeated_values(X, W, precision))

    return wmedian
Esempio n. 3
0
def stats_per_label(values, labels, include_labels=[], exclude_labels=[],
                    weights=[], precision=1):
    """
    Compute various statistical measures across vertices per label,
    optionally using weights (such as surface area per vertex).

    Example (area-weighted mean):
    average value = sum(a_i * v_i) / total_surface_area,
    where *a_i* and *v_i* are the area and value for each vertex *i*.

    Reference:
        Weighted skewness and kurtosis unbiased by sample size
        Lorenzo Rimoldini, arXiv:1304.6564 (2013)
        http://arxiv.org/abs/1304.6564

    Note ::
        This function is different than means_per_label() in two ways:
            1. It computes more than simply the (weighted) mean and sdev.
            2. It only accepts 1-D arrays of values.

    Parameters
    ----------
    values : numpy array of individual or lists of integers or floats
        values for all vertices
    labels : list or array of integers
        label for each value
    include_labels : list of integers
        labels to include
    exclude_labels : list of integers
        labels to be excluded
    weights : numpy array of floats
        weights to compute weighted statistical measures
    precision : integer
        number of decimal places to consider weights

    Returns
    -------
    medians : list of floats
        median for each label
    mads : list of floats
        median absolute deviation for each label
    means : list of floats
        mean for each label
    sdevs : list of floats
        standard deviation for each label
    skews : list of floats
        skew for each label
    kurts : list of floats
        kurtosis value for each label
    lower_quarts : list of floats
        lower quartile for each label
    upper_quarts : list of floats
        upper quartile for each label
    label_list : list of integers
        list of unique labels

    Examples
    --------
    >>> import numpy as np
    >>> from mindboggle.mio.vtks import read_scalars
    >>> from mindboggle.guts.compute import stats_per_label
    >>> from mindboggle.mio.fetch_data import prep_tests
    >>> urls, fetch_data = prep_tests()
    >>> values_file = fetch_data(urls['left_mean_curvature'])
    >>> labels_file = fetch_data(urls['left_freesurfer_labels'])
    >>> area_file = fetch_data(urls['left_area'])
    >>> values, name = read_scalars(values_file, True, True)
    >>> areas, name = read_scalars(area_file, True, True)
    >>> labels, name = read_scalars(labels_file)
    >>> include_labels = []
    >>> exclude_labels = [-1]
    >>> weights = areas
    >>> precision = 1
    >>> medians, mads, means, sdevs, skews, kurts, lower_quarts, upper_quarts, label_list = stats_per_label(values,
    ...     labels, include_labels, exclude_labels, weights, precision)
    >>> print(np.array_str(np.array(medians[0:5]),
    ...       precision=5, suppress_small=True))
    [-1.13602 -1.22961 -2.49665 -3.80782 -3.37309]
    >>> print(np.array_str(np.array(mads[0:5]),
    ...       precision=5, suppress_small=True))
    [ 1.17026  1.5045   1.28234  2.11515  1.69333]
    >>> print(np.array_str(np.array(means[0:5]),
    ...       precision=5, suppress_small=True))
    [-1.1793  -1.21405 -2.49318 -3.58116 -3.34987]
    >>> print(np.array_str(np.array(kurts[0:5]),
    ...       precision=5, suppress_small=True))
    [ 2.34118 -0.3969  -0.55787 -0.73993  0.3807 ]

    """
    import numpy as np
    from scipy.stats import skew, kurtosis, scoreatpercentile
    from mindboggle.guts.compute import weighted_to_repeated_values, median_abs_dev

    # Make sure arguments are numpy arrays:
    if not isinstance(values, np.ndarray):
        values = np.asarray(values)
    if not isinstance(weights, np.ndarray):
        weights = np.asarray(weights)

    # Initialize all statistical lists:
    if include_labels:
        label_list = include_labels
    else:
        label_list = np.unique(labels)
    label_list = [int(x) for x in label_list if int(x) not in exclude_labels]
    medians = []
    mads = []
    means = []
    sdevs = []
    skews = []
    kurts = []
    lower_quarts = []
    upper_quarts = []

    # Extract all vertex indices for each label:
    for label in label_list:
        I = [i for i,x in enumerate(labels) if x == label]
        if I:
            # Get the vertex values:
            X = values[I]
            if len([x for x in X if x != 0]):
                # If there are as many weights as values, apply the weights to the values:
                if np.size(weights) == np.size(values):
                    W = weights[I]
                    sumW = np.sum(W)
                    # If the sum of the weights and the standard deviation is non-zero,
                    # compute all statistics of the weighted values:
                    if sumW > 0:
                        Xdiff = X - np.mean(X)
                        Xstd = np.sqrt(np.sum(W * Xdiff**2) / sumW)
                        means.append(np.sum(W * X) / sumW)
                        sdevs.append(Xstd)
                        if Xstd > 0:
                            skews.append((np.sum(W * Xdiff**3) / sumW) / Xstd**3)
                            kurts.append((np.sum(W * Xdiff**4) / sumW) / Xstd**4 - 3)
                        else:
                            skews.append(skew(X))
                            kurts.append(kurtosis(X))
                        X = weighted_to_repeated_values(X, W, precision)
                    # If the sum of the weights equals zero, simply compute the statistics:
                    else:
                        means.append(np.mean(X))
                        sdevs.append(np.std(X))
                        skews.append(skew(X))
                        kurts.append(kurtosis(X))
                # If there are no (or not enough) weights, simply compute the statistics:
                else:
                    means.append(np.mean(X))
                    sdevs.append(np.std(X))
                    skews.append(skew(X))
                    kurts.append(kurtosis(X))
                # Compute median, median absolute deviation, and lower and upper quartiles:
                if np.size(X):
                    medians.append(np.median(X))
                    mads.append(median_abs_dev(X))
                    lower_quarts.append(scoreatpercentile(X, 25))
                    upper_quarts.append(scoreatpercentile(X, 75))
                # If the weights are all smaller than the precision, then X will disappear,
                # so set the above statistics (in the 'if' block) to zero:
                else:
                    medians.append(0)
                    mads.append(0)
                    lower_quarts.append(0)
                    upper_quarts.append(0)
            # If all values are equal to zero, set all statistics to zero:
            else:
                medians.append(0)
                mads.append(0)
                means.append(0)
                sdevs.append(0)
                skews.append(0)
                kurts.append(0)
                lower_quarts.append(0)
                upper_quarts.append(0)
        # If there are no vertices for the label, set all statistics to zero:
        else:
            medians.append(0)
            mads.append(0)
            means.append(0)
            sdevs.append(0)
            skews.append(0)
            kurts.append(0)
            lower_quarts.append(0)
            upper_quarts.append(0)

    return medians, mads, means, sdevs, skews, kurts, \
           lower_quarts, upper_quarts, label_list
Esempio n. 4
0
def stats_per_label(values, labels, include_labels=[], exclude_labels=[], weights=[], precision=1):
    """
    Compute various statistical measures across vertices per label,
    optionally using weights (such as surface area per vertex).

    Example (area-weighted mean):
    average value = sum(a_i * v_i) / total_surface_area,
    where *a_i* and *v_i* are the area and value for each vertex *i*.

    Note ::
        This function is different than means_per_label() in two ways:
            1. It computes more than simply the (weighted) mean and sdev.
            2. It only accepts 1-D arrays of values.

    Reference
    ---------
    Weighted skewness and kurtosis unbiased by sample size
    Lorenzo Rimoldini, arXiv:1304.6564 (2013)
    http://arxiv.org/abs/1304.6564

    Parameters
    ----------
    values : numpy array of individual or lists of integers or floats
        values for all vertices
    labels : list or array of integers
        label for each value
    include_labels : list of integers
        labels to include
    exclude_labels : list of integers
        labels to be excluded
    weights : numpy array of floats
        weights to compute weighted statistical measures
    precision : integer
        number of decimal places to consider weights

    Returns
    -------
    medians : list of floats
        median for each label
    mads : list of floats
        median absolute deviation for each label
    means : list of floats
        mean for each label
    sdevs : list of floats
        standard deviation for each label
    skews : list of floats
        skew for each label
    kurts : list of floats
        kurtosis value for each label
    lower_quarts : list of floats
        lower quartile for each label
    upper_quarts : list of floats
        upper quartile for each label
    label_list : list of integers
        list of unique labels

    Examples
    --------
    >>> import os
    >>> from mindboggle.mio.vtks import read_scalars
    >>> from mindboggle.guts.compute import stats_per_label
    >>> data_path = os.environ['MINDBOGGLE_DATA']
    >>> values_file = os.path.join(data_path, 'arno', 'shapes', 'lh.pial.mean_curvature.vtk')
    >>> area_file = os.path.join(data_path, 'arno', 'shapes', 'lh.pial.area.vtk')
    >>> labels_file = os.path.join(data_path, 'arno', 'labels', 'lh.labels.DKT25.manual.vtk')
    >>> values, name = read_scalars(values_file, True, True)
    >>> areas, name = read_scalars(area_file, True, True)
    >>> labels, name = read_scalars(labels_file)
    >>> include_labels = []
    >>> exclude_labels = [-1]
    >>> weights = areas
    >>> precision = 1
    >>> stats_per_label(values, labels, include_labels, exclude_labels, weights, precision)

    """
    import numpy as np
    from scipy.stats import skew, kurtosis, scoreatpercentile
    from mindboggle.guts.compute import weighted_to_repeated_values, median_abs_dev

    # Make sure arguments are numpy arrays:
    if not isinstance(values, np.ndarray):
        values = np.asarray(values)
    if not isinstance(weights, np.ndarray):
        weights = np.asarray(weights)

    # Initialize all statistical lists:
    if include_labels:
        label_list = include_labels
    else:
        label_list = np.unique(labels)
    label_list = [int(x) for x in label_list if int(x) not in exclude_labels]
    medians = []
    mads = []
    means = []
    sdevs = []
    skews = []
    kurts = []
    lower_quarts = []
    upper_quarts = []

    # Extract all vertex indices for each label:
    for label in label_list:
        I = [i for i, x in enumerate(labels) if x == label]
        if I:
            # Get the vertex values:
            X = values[I]
            if len([x for x in X if x != 0]):
                # If there are as many weights as values, apply the weights to the values:
                if np.size(weights) == np.size(values):
                    W = weights[I]
                    sumW = np.sum(W)
                    # If the sum of the weights and the standard deviation is non-zero,
                    # compute all statistics of the weighted values:
                    if sumW > 0:
                        Xdiff = X - np.mean(X)
                        Xstd = np.sqrt(np.sum(W * Xdiff ** 2) / sumW)
                        means.append(np.sum(W * X) / sumW)
                        sdevs.append(Xstd)
                        if Xstd > 0:
                            skews.append((np.sum(W * Xdiff ** 3) / sumW) / Xstd ** 3)
                            kurts.append((np.sum(W * Xdiff ** 4) / sumW) / Xstd ** 4 - 3)
                        else:
                            skews.append(skew(X))
                            kurts.append(kurtosis(X))
                        X = weighted_to_repeated_values(X, W, precision)
                    # If the sum of the weights equals zero, simply compute the statistics:
                    else:
                        means.append(np.mean(X))
                        sdevs.append(np.std(X))
                        skews.append(skew(X))
                        kurts.append(kurtosis(X))
                # If there are no (or not enough) weights, simply compute the statistics:
                else:
                    means.append(np.mean(X))
                    sdevs.append(np.std(X))
                    skews.append(skew(X))
                    kurts.append(kurtosis(X))
                # Compute median, median absolute deviation, and lower and upper quartiles:
                if np.size(X):
                    medians.append(np.median(X))
                    mads.append(median_abs_dev(X))
                    lower_quarts.append(scoreatpercentile(X, 25))
                    upper_quarts.append(scoreatpercentile(X, 75))
                # If the weights are all smaller than the precision, then X will disappear,
                # so set the above statistics (in the 'if' block) to zero:
                else:
                    medians.append(0)
                    mads.append(0)
                    lower_quarts.append(0)
                    upper_quarts.append(0)
            # If all values are equal to zero, set all statistics to zero:
            else:
                medians.append(0)
                mads.append(0)
                means.append(0)
                sdevs.append(0)
                skews.append(0)
                kurts.append(0)
                lower_quarts.append(0)
                upper_quarts.append(0)
        # If there are no vertices for the label, set all statistics to zero:
        else:
            medians.append(0)
            mads.append(0)
            means.append(0)
            sdevs.append(0)
            skews.append(0)
            kurts.append(0)
            lower_quarts.append(0)
            upper_quarts.append(0)

    return medians, mads, means, sdevs, skews, kurts, lower_quarts, upper_quarts, label_list