def _calculate_cluster_based_test_statistic(dataset_1, dataset_2, min_area=1, cluster_alpha=0.05, sample_statistic='independent', cluster_statistic='maxsum'): if sample_statistic == 'independent': t_test_result = sstat.ttest_ind(dataset_1, dataset_2, axis=-1, equal_var=False) if sample_statistic == 'dependent': t_test_result = sstat.ttest_rel(dataset_1, dataset_2, axis=-1) masked_pvalues = np.zeros(t_test_result.pvalue.shape) masked_pvalues[t_test_result.pvalue < cluster_alpha] = 1 cluster_labels, num = measurements.label(masked_pvalues) cluster_areas, _ = measurements._stats( masked_pvalues, cluster_labels, index=np.arange(cluster_labels.max() + 1)) clusters_over_min_area = np.squeeze(np.argwhere(cluster_areas > min_area)) cluster_labels_over_min_area = np.ones(cluster_labels.shape) * -1 cluster_statistic_result = [] if clusters_over_min_area.shape == (): clusters_over_min_area = [clusters_over_min_area] for c_index in np.arange(len(clusters_over_min_area)): cluster = clusters_over_min_area[c_index] if cluster_statistic == 'maxsum': cluster_statistic_result.append( np.sum(t_test_result.pvalue[cluster_labels == cluster])) if cluster_statistic == 'maxarea': cluster_statistic_result.append(cluster_areas[cluster]) cluster_labels_over_min_area[cluster_labels == cluster] = c_index return cluster_statistic_result, cluster_labels_over_min_area
def vertices_to_parcels(data, *, lhannot, rhannot, drop=None): """ Reduces vertex-level `data` to parcels defined in annotation files Takes average of vertices within each parcel, excluding np.nan values (i.e., np.nanmean). Assigns np.nan to parcels for which all vertices are np.nan. Parameters ---------- data : (N,) numpy.ndarray Vertex-level data to be reduced to parcels {lh,rh}annot : str Path to .annot file containing labels to parcels on the {left,right} hemisphere drop : list, optional Specifies regions in {lh,rh}annot that should be removed from the parcellated version of `data`. If not specified, vertices corresponding to parcels defined in `netneurotools.freesurfer.FSIGNORE` will be removed. Default: None Reurns ------ reduced : numpy.ndarray Parcellated `data`, without regions specified in `drop` """ if drop is None: drop = FSIGNORE drop = _decode_list(drop) data = np.vstack(data) n_parc = expected = 0 for a in [lhannot, rhannot]: vn, _, names = read_annot(a) expected += len(vn) names = _decode_list(names) n_parc += len(names) - len(set(drop) & set(names)) if expected != len(data): raise ValueError('Number of vertices in provided annotation files ' 'differs from size of vertex-level data array.\n' ' EXPECTED: {} vertices\n' ' RECEIVED: {} vertices'.format( expected, len(data))) reduced = np.zeros((n_parc, data.shape[-1]), dtype=data.dtype) start = end = n_parc = 0 for annot in [lhannot, rhannot]: # read files and update end index for `data` labels, ctab, names = read_annot(annot) names = _decode_list(names) indices = np.unique(labels) end += len(labels) for idx in range(data.shape[-1]): # get average of vertex-level data within parcels # set all NaN values to 0 before calling `_stats` because we are # returning sums, so the 0 values won't impact the sums (if we left # the NaNs then all parcels with even one NaN entry would be NaN) currdata = np.squeeze(data[start:end, idx]) isna = np.isnan(currdata) counts, sums = _stats(np.nan_to_num(currdata), labels, indices) # however, we do need to account for the NaN values in the counts # so that our means are similar to what we'd get from e.g., # np.nanmean here, our "sums" are the counts of NaN values in our # parcels _, nacounts = _stats(isna, labels, indices) counts = (np.asanyarray(counts, dtype=float) - np.asanyarray(nacounts, dtype=float)) with np.errstate(divide='ignore', invalid='ignore'): currdata = sums / counts # get indices of unkown and corpuscallosum and delete from parcels inds = sorted([names.index(f) for f in set(drop) & set(names)]) currdata = np.delete(currdata, inds) # store parcellated data reduced[n_parc:n_parc + len(names) - len(inds), idx] = currdata start = end n_parc += len(names) - len(inds) return np.squeeze(reduced)
def vertices_to_parcels(data, *, lhannot, rhannot, drop=None): """ Reduces vertex-level `data` to parcels defined in annotation files Takes average of vertices within each parcel, excluding np.nan values (i.e., np.nanmean). Assigns np.nan to parcels for which all vertices are np.nan. Parameters ---------- data : (N,) numpy.ndarray Vertex-level data to be reduced to parcels {lh,rh}annot : str Path to .annot file containing labels to parcels on the {left,right} hemisphere drop : list, optional Specifies regions in {lh,rh}annot that should be removed from the parcellated version of `data`. If not specified, 'unknown' and 'corpuscallosum' will be removed. Default: None Reurns ------ reduced : numpy.ndarray Parcellated `data`, without regions specified in `drop` """ if drop is None: drop = ['unknown', 'corpuscallosum'] drop = _decode_list(drop) start = end = 0 reduced = [] # check this so we're not unduly surprised by anything... expected = sum([len(read_annot(a)[0]) for a in [lhannot, rhannot]]) if expected != len(data): raise ValueError('Number of vertices in provided annotation files ' 'differs from size of vertex-level data array.\n' ' EXPECTED: {} vertices\n' ' RECEIVED: {} vertices'.format( expected, len(data))) for annot in [lhannot, rhannot]: # read files and update end index for `data` labels, ctab, names = read_annot(annot) names = _decode_list(names) indices = np.unique(labels) end += len(labels) # get average of vertex-level data within parcels # set all NaN values to 0 before calling `_stats` because we are # returning sums, so the 0 values won't impact the sums (if we left # the NaNs then all parcels with even one NaN entry would be NaN) currdata = np.squeeze(data[start:end]) isna = np.isnan(currdata) counts, sums = _stats(np.nan_to_num(currdata), labels, indices) # however, we do need to account for the NaN values in the counts # so that our means are similar to what we'd get from e.g., np.nanmean # here, our "sums" are the counts of NaN values in our parcels _, nacounts = _stats(isna, labels, indices) counts = (np.asanyarray(counts, dtype=float) - np.asanyarray(nacounts, dtype=float)) with np.errstate(divide='ignore', invalid='ignore'): currdata = sums / counts # get indices of unkown and corpuscallosum and delete from parcels inds = [names.index(f) for f in drop] currdata = np.delete(currdata, inds) # store parcellated data reduced.append(currdata) start = end return np.hstack(reduced)
def reduce_from_vertices(data, rhannot, lhannot): """ Reduces vertex-level `data` to parcels defined in annotation files Takes average of vertices within each parcel, excluding np.nan values (i.e., np.nanmean). Assigns np.nan to parcels for which all vertices are np.nan. Parameters ---------- data : (N,) numpy.ndarray Vertex-level data to be reduced to parcels {rh,lh}annot : str Path to .annot file containing labels to parcels on the {right,left} hemisphere Reurns ------ reduced : numpy.ndarray Parcellated data """ drop = [b'unknown', b'corpuscallosum'] start = end = 0 reduced = [] # check this so we're not unduly surprised by anything... expected = sum([len(read_annot(a)[0]) for a in [rhannot, lhannot]]) if expected != len(data): raise ValueError('Number of vertices in provided annotation files ' 'differs from size of vertex-level data array.\n' ' EXPECTED: {} vertices\n' ' RECEIVED: {} vertices'.format( expected, len(data))) for annot in [rhannot, lhannot]: # read files and update end index for `data` labels, ctab, names = read_annot(annot) end += len(labels) # get average of vertex-level data within parcels # set all NaN values to 0 before calling `_stats` because we are # returning sums, so the 0 values won't impact the sums (if we left # the NaNs then all parcels with a single NaN value would be NaN) currdata = data[start:end].copy() isna = np.isnan(currdata) currdata[isna] = 0 counts, sums = _stats(currdata, labels, np.unique(labels)) # however, we do need to account for the NaN values in the counts # so that our means are similar to what we'd get from e.g., np.nanmean # here, our "sums" are the counts of NaN values in our parcels _, nacounts = _stats(isna, labels, np.unique(labels)) counts = (np.asanyarray(counts).astype(float) - np.asanyarray(nacounts).astype(float)) with np.errstate(divide='ignore', invalid='ignore'): currdata = sums / counts # get indices of unkown and corpuscallosum and delete from parcels inds = [names.index(f) for f in drop] currdata = np.delete(currdata, inds) # store parcellated data reduced.append(currdata) start = end return np.hstack(reduced)
def image_segmentation(ndvi, predict): write_cog(ndvi.to_array().compute(), "NDVI.tif", overwrite=True) # store temp files somewhere directory = "tmp" if not os.path.exists(directory): os.mkdir(directory) tmp = "tmp/" # inputs to image seg tiff_to_segment = "NDVI.tif" kea_file = "NDVI.kea" segmented_kea_file = "segmented.kea" # convert tiff to kea gdal.Translate( destName=kea_file, srcDS=tiff_to_segment, format="KEA", outputSRS="EPSG:6933" ) # run image seg with HiddenPrints(): segutils.runShepherdSegmentation( inputImg=kea_file, outputClumps=segmented_kea_file, tmpath=tmp, numClusters=60, minPxls=100, ) # convert kea to tif kwargs = { 'outputType': gdal.GDT_Float32, } gdal.Translate( destName=segmented_kea_file[:-3]+'tif', srcDS=segmented_kea_file, outputSRS="EPSG:6933", format='GTiff', **kwargs ) # open segments segments = xr.open_rasterio(segmented_kea_file[:-3]+'tif').squeeze().values # calculate mode count, _sum = _stats(predict, labels=segments, index=segments) mode = _sum > (count / 2) mode = xr.DataArray( mode, coords=predict.coords, dims=predict.dims, attrs=predict.attrs ) # remove the tmp folder shutil.rmtree(tmp) os.remove(kea_file) os.remove(segmented_kea_file) os.remove(tiff_to_segment) os.remove(segmented_kea_file[:-3]+'tif') return mode.chunk({})
def sum(input, labels=None, index=None): count, sum = _stats(input, labels, index) return sum