Python _stats Examples, scipy.ndimage.measurements._stats Python Examples

Example #1

0

Show file

File: cluster_based_permutation_tests.py Project: georgedimitriadis/themeaningofbrain

def _calculate_cluster_based_test_statistic(dataset_1,
                                            dataset_2,
                                            min_area=1,
                                            cluster_alpha=0.05,
                                            sample_statistic='independent',
                                            cluster_statistic='maxsum'):
    if sample_statistic == 'independent':
        t_test_result = sstat.ttest_ind(dataset_1,
                                        dataset_2,
                                        axis=-1,
                                        equal_var=False)
    if sample_statistic == 'dependent':
        t_test_result = sstat.ttest_rel(dataset_1, dataset_2, axis=-1)

    masked_pvalues = np.zeros(t_test_result.pvalue.shape)
    masked_pvalues[t_test_result.pvalue < cluster_alpha] = 1

    cluster_labels, num = measurements.label(masked_pvalues)
    cluster_areas, _ = measurements._stats(
        masked_pvalues,
        cluster_labels,
        index=np.arange(cluster_labels.max() + 1))
    clusters_over_min_area = np.squeeze(np.argwhere(cluster_areas > min_area))

    cluster_labels_over_min_area = np.ones(cluster_labels.shape) * -1
    cluster_statistic_result = []

    if clusters_over_min_area.shape == ():
        clusters_over_min_area = [clusters_over_min_area]
    for c_index in np.arange(len(clusters_over_min_area)):
        cluster = clusters_over_min_area[c_index]
        if cluster_statistic == 'maxsum':
            cluster_statistic_result.append(
                np.sum(t_test_result.pvalue[cluster_labels == cluster]))
        if cluster_statistic == 'maxarea':
            cluster_statistic_result.append(cluster_areas[cluster])
        cluster_labels_over_min_area[cluster_labels == cluster] = c_index
    return cluster_statistic_result, cluster_labels_over_min_area

Example #2

0

Show file

def vertices_to_parcels(data, *, lhannot, rhannot, drop=None):
    """
    Reduces vertex-level `data` to parcels defined in annotation files

    Takes average of vertices within each parcel, excluding np.nan values
    (i.e., np.nanmean). Assigns np.nan to parcels for which all vertices are
    np.nan.

    Parameters
    ----------
    data : (N,) numpy.ndarray
        Vertex-level data to be reduced to parcels
    {lh,rh}annot : str
        Path to .annot file containing labels to parcels on the {left,right}
        hemisphere
    drop : list, optional
        Specifies regions in {lh,rh}annot that should be removed from the
        parcellated version of `data`. If not specified, vertices corresponding
        to parcels defined in `netneurotools.freesurfer.FSIGNORE` will be
        removed. Default: None

    Reurns
    ------
    reduced : numpy.ndarray
        Parcellated `data`, without regions specified in `drop`
    """

    if drop is None:
        drop = FSIGNORE
    drop = _decode_list(drop)

    data = np.vstack(data)

    n_parc = expected = 0
    for a in [lhannot, rhannot]:
        vn, _, names = read_annot(a)
        expected += len(vn)
        names = _decode_list(names)
        n_parc += len(names) - len(set(drop) & set(names))
    if expected != len(data):
        raise ValueError('Number of vertices in provided annotation files '
                         'differs from size of vertex-level data array.\n'
                         '    EXPECTED: {} vertices\n'
                         '    RECEIVED: {} vertices'.format(
                             expected, len(data)))

    reduced = np.zeros((n_parc, data.shape[-1]), dtype=data.dtype)
    start = end = n_parc = 0
    for annot in [lhannot, rhannot]:
        # read files and update end index for `data`
        labels, ctab, names = read_annot(annot)
        names = _decode_list(names)

        indices = np.unique(labels)
        end += len(labels)

        for idx in range(data.shape[-1]):
            # get average of vertex-level data within parcels
            # set all NaN values to 0 before calling `_stats` because we are
            # returning sums, so the 0 values won't impact the sums (if we left
            # the NaNs then all parcels with even one NaN entry would be NaN)
            currdata = np.squeeze(data[start:end, idx])
            isna = np.isnan(currdata)
            counts, sums = _stats(np.nan_to_num(currdata), labels, indices)

            # however, we do need to account for the NaN values in the counts
            # so that our means are similar to what we'd get from e.g.,
            # np.nanmean here, our "sums" are the counts of NaN values in our
            # parcels
            _, nacounts = _stats(isna, labels, indices)
            counts = (np.asanyarray(counts, dtype=float) -
                      np.asanyarray(nacounts, dtype=float))

            with np.errstate(divide='ignore', invalid='ignore'):
                currdata = sums / counts

            # get indices of unkown and corpuscallosum and delete from parcels
            inds = sorted([names.index(f) for f in set(drop) & set(names)])
            currdata = np.delete(currdata, inds)

            # store parcellated data
            reduced[n_parc:n_parc + len(names) - len(inds), idx] = currdata

        start = end
        n_parc += len(names) - len(inds)

    return np.squeeze(reduced)

Example #3

0

Show file

def vertices_to_parcels(data, *, lhannot, rhannot, drop=None):
    """
    Reduces vertex-level `data` to parcels defined in annotation files

    Takes average of vertices within each parcel, excluding np.nan values
    (i.e., np.nanmean). Assigns np.nan to parcels for which all vertices are
    np.nan.

    Parameters
    ----------
    data : (N,) numpy.ndarray
        Vertex-level data to be reduced to parcels
    {lh,rh}annot : str
        Path to .annot file containing labels to parcels on the {left,right}
        hemisphere
    drop : list, optional
        Specifies regions in {lh,rh}annot that should be removed from the
        parcellated version of `data`. If not specified, 'unknown' and
        'corpuscallosum' will be removed. Default: None

    Reurns
    ------
    reduced : numpy.ndarray
        Parcellated `data`, without regions specified in `drop`
    """

    if drop is None:
        drop = ['unknown', 'corpuscallosum']
    drop = _decode_list(drop)

    start = end = 0
    reduced = []

    # check this so we're not unduly surprised by anything...
    expected = sum([len(read_annot(a)[0]) for a in [lhannot, rhannot]])
    if expected != len(data):
        raise ValueError('Number of vertices in provided annotation files '
                         'differs from size of vertex-level data array.\n'
                         '    EXPECTED: {} vertices\n'
                         '    RECEIVED: {} vertices'.format(
                             expected, len(data)))

    for annot in [lhannot, rhannot]:
        # read files and update end index for `data`
        labels, ctab, names = read_annot(annot)
        names = _decode_list(names)

        indices = np.unique(labels)
        end += len(labels)

        # get average of vertex-level data within parcels
        # set all NaN values to 0 before calling `_stats` because we are
        # returning sums, so the 0 values won't impact the sums (if we left
        # the NaNs then all parcels with even one NaN entry would be NaN)
        currdata = np.squeeze(data[start:end])
        isna = np.isnan(currdata)
        counts, sums = _stats(np.nan_to_num(currdata), labels, indices)

        # however, we do need to account for the NaN values in the counts
        # so that our means are similar to what we'd get from e.g., np.nanmean
        # here, our "sums" are the counts of NaN values in our parcels
        _, nacounts = _stats(isna, labels, indices)
        counts = (np.asanyarray(counts, dtype=float) -
                  np.asanyarray(nacounts, dtype=float))

        with np.errstate(divide='ignore', invalid='ignore'):
            currdata = sums / counts

        # get indices of unkown and corpuscallosum and delete from parcels
        inds = [names.index(f) for f in drop]
        currdata = np.delete(currdata, inds)

        # store parcellated data
        reduced.append(currdata)
        start = end

    return np.hstack(reduced)

Example #4

0

Show file

def reduce_from_vertices(data, rhannot, lhannot):
    """
    Reduces vertex-level `data` to parcels defined in annotation files

    Takes average of vertices within each parcel, excluding np.nan values
    (i.e., np.nanmean). Assigns np.nan to parcels for which all vertices are
    np.nan.

    Parameters
    ----------
    data : (N,) numpy.ndarray
        Vertex-level data to be reduced to parcels
    {rh,lh}annot : str
        Path to .annot file containing labels to parcels on the {right,left}
        hemisphere

    Reurns
    ------
    reduced : numpy.ndarray
        Parcellated data
    """

    drop = [b'unknown', b'corpuscallosum']
    start = end = 0
    reduced = []

    # check this so we're not unduly surprised by anything...
    expected = sum([len(read_annot(a)[0]) for a in [rhannot, lhannot]])
    if expected != len(data):
        raise ValueError('Number of vertices in provided annotation files '
                         'differs from size of vertex-level data array.\n'
                         '    EXPECTED: {} vertices\n'
                         '    RECEIVED: {} vertices'.format(
                             expected, len(data)))

    for annot in [rhannot, lhannot]:
        # read files and update end index for `data`
        labels, ctab, names = read_annot(annot)
        end += len(labels)

        # get average of vertex-level data within parcels
        # set all NaN values to 0 before calling `_stats` because we are
        # returning sums, so the 0 values won't impact the sums (if we left
        # the NaNs then all parcels with a single NaN value would be NaN)
        currdata = data[start:end].copy()
        isna = np.isnan(currdata)
        currdata[isna] = 0
        counts, sums = _stats(currdata, labels, np.unique(labels))

        # however, we do need to account for the NaN values in the counts
        # so that our means are similar to what we'd get from e.g., np.nanmean
        # here, our "sums" are the counts of NaN values in our parcels
        _, nacounts = _stats(isna, labels, np.unique(labels))
        counts = (np.asanyarray(counts).astype(float) -
                  np.asanyarray(nacounts).astype(float))

        with np.errstate(divide='ignore', invalid='ignore'):
            currdata = sums / counts

        # get indices of unkown and corpuscallosum and delete from parcels
        inds = [names.index(f) for f in drop]
        currdata = np.delete(currdata, inds)

        # store parcellated data
        reduced.append(currdata)
        start = end

    return np.hstack(reduced)

Example #5

0

Show file

def image_segmentation(ndvi, predict):
    write_cog(ndvi.to_array().compute(), "NDVI.tif", overwrite=True)

    # store temp files somewhere
    directory = "tmp"
    if not os.path.exists(directory):
        os.mkdir(directory)

    tmp = "tmp/"

    # inputs to image seg
    tiff_to_segment = "NDVI.tif"
    kea_file = "NDVI.kea"
    segmented_kea_file = "segmented.kea"

    # convert tiff to kea
    gdal.Translate(
        destName=kea_file, srcDS=tiff_to_segment, format="KEA", outputSRS="EPSG:6933"
    )

    # run image seg
    with HiddenPrints():
        segutils.runShepherdSegmentation(
            inputImg=kea_file,
            outputClumps=segmented_kea_file,
            tmpath=tmp,
            numClusters=60,
            minPxls=100,
        )
    
    # convert kea to tif
    kwargs = {
        'outputType': gdal.GDT_Float32,
    }
    
    gdal.Translate(
        destName=segmented_kea_file[:-3]+'tif',
        srcDS=segmented_kea_file,
        outputSRS="EPSG:6933",
        format='GTiff',
        **kwargs
    )
    
    # open segments
    segments = xr.open_rasterio(segmented_kea_file[:-3]+'tif').squeeze().values

    # calculate mode
    count, _sum = _stats(predict, labels=segments, index=segments)
    mode = _sum > (count / 2)
    mode = xr.DataArray(
        mode, coords=predict.coords, dims=predict.dims, attrs=predict.attrs
    )

    # remove the tmp folder
    shutil.rmtree(tmp)
    os.remove(kea_file)
    os.remove(segmented_kea_file)
    os.remove(tiff_to_segment)
    os.remove(segmented_kea_file[:-3]+'tif')

    return mode.chunk({})

Example #6

0

Show file

def sum(input, labels=None, index=None):
    count, sum = _stats(input, labels, index)
    return sum