def get_tract_profile(bundle, metric_img, metric_affine, use_weights=False, flip=True, num_points=100): ''' This function reorients the streamlines and extracts the diffusion metrics along the tract. It essentiall performs step 1. The default number of points along a tract is 100, which can be thought of as %-along a tract. The flip variable signals if you would like to flip the direction of the streamlines after reorientation. For example if after reorientation all the streamlines were motor cortex -> brainstem and you actually wanted brainstem -> motor cortex, then you set flip to True. The default is True because generally we see reorientation result in motor cortex -> brainstem. For the honours project, we were looking for the opposite ''' # Reorient all the streamlines so that they are follwing the same direction feature = ResampleFeature(nb_points=num_points) d_metric = AveragePointwiseEuclideanMetric(feature) qb = QuickBundles(np.inf, metric=d_metric) centroid_bundle = qb.cluster(bundle).centroids[0] oriented_bundle = orient_by_streamline(bundle, centroid_bundle) # Calculate weights for each streamline/node in a bundle, based on a # Mahalanobis distance from the core the bundle, at that node w_bundle = None if use_weights: w_bundle = gaussian_weights(oriented_bundle) # Sample the metric along the tract. The implementation of this function # is based off of work by Yeatman et al. in 2012 profile_bundle = afq_profile(metric_img, oriented_bundle, metric_affine, weights=w_bundle) # Reverse the profile bundle if the direction is not desired if flip: profile_bundle = np.flip(profile_bundle) return profile_bundle
def test_gaussian_weights(): # Some bogus x,y,z coordinates x = np.arange(10).astype(float) y = np.arange(10).astype(float) z = np.arange(10).astype(float) # Create a distribution for which we can predict the weights we would # expect to get: bundle = Streamlines( [np.array([x, y, z]).T + 1, np.array([x, y, z]).T - 1]) # In this case, all nodes receives an equal weight of 0.5: w = gaussian_weights(bundle, n_points=10) npt.assert_almost_equal(w, np.ones((len(bundle), 10)) * 0.5) # Test when asked to return Mahalanobis, instead of weights w = gaussian_weights(bundle, n_points=10, return_mahalnobis=True) npt.assert_almost_equal(w, np.ones((len(bundle), 10))) # Here, some nodes are twice as far from the mean as others bundle = Streamlines([ np.array([x, y, z]).T + 2, np.array([x, y, z]).T + 1, np.array([x, y, z]).T - 1, np.array([x, y, z]).T - 2 ]) w = gaussian_weights(bundle, n_points=10) # And their weights should be halved: npt.assert_almost_equal(w[0], w[1] / 2) npt.assert_almost_equal(w[-1], w[2] / 2) # Test the situation where all the streamlines have an identical node: arr1 = np.array([x, y, z]).T + 2 arr2 = np.array([x, y, z]).T + 1 arr3 = np.array([x, y, z]).T - 1 arr4 = np.array([x, y, z]).T - 2 arr1[0] = np.array([1, 1, 1]) arr2[0] = np.array([1, 1, 1]) arr3[0] = np.array([1, 1, 1]) arr4[0] = np.array([1, 1, 1]) bundle_w_id_node = Streamlines([arr1, arr2, arr3, arr4]) w = gaussian_weights(Streamlines(bundle_w_id_node), n_points=10) # For this case, the result should be a weight of 1/n_streamlines in that # node for all streamlines: npt.assert_equal( w[:, 0], np.ones(len(bundle_w_id_node)) * 1 / len(bundle_w_id_node)) # Test the situation where all the streamlines are copies of each other: bundle_w_copies = Streamlines([bundle[0], bundle[0], bundle[0], bundle[0]]) w = gaussian_weights(bundle_w_copies, n_points=10) # In this case, the entire array should be equal to 1/n_streamlines: npt.assert_equal(w, np.ones(w.shape) * 1 / len(bundle_w_id_node)) # Test with bundle of length 1: bundle_len_1 = Streamlines([bundle[0]]) w = gaussian_weights(bundle_len_1, n_points=10) npt.assert_equal(w, np.ones(w.shape)) bundle_len_1 = Streamlines([bundle[0]]) w = gaussian_weights(bundle_len_1, n_points=10, return_mahalnobis=True) npt.assert_equal(w, np.ones(w.shape) * np.nan)
""" files, folder = dpd.fetch_bundle_fa_hcp() import nibabel as nib img = nib.load(op.join(folder, "hcp_bundle_fa.nii.gz")) fa = img.get_fdata() """ Calculate weights for each bundle: """ import dipy.stats.analysis as dsa w_cst_l = dsa.gaussian_weights(oriented_cst_l) w_af_l = dsa.gaussian_weights(oriented_af_l) """ And then use the weights to calculate the tract profiles for each bundle """ profile_cst_l = dsa.afq_profile(fa, oriented_cst_l, affine=img.affine, weights=w_cst_l) profile_af_l = dsa.afq_profile(fa, oriented_af_l, affine=img.affine, weights=w_af_l) fig, (ax1, ax2) = plt.subplots(1, 2) ax1.plot(profile_cst_l)
########################################################################## # Bundle profiles # --------------- # Streamlines are represented in the original diffusion space (`Space.VOX`) and # scalar properties along the length of each bundle are queried from this # scalar data. Here, the contribution of each streamline is weighted according # to how representative this streamline is of the bundle overall. print("Extracting tract profiles...") for bundle in bundles: sft = load_tractogram(op.join(working_dir, f'{bundle}_afq.trk'), img, to_space=Space.VOX) fig, ax = plt.subplots(1) weights = gaussian_weights(sft.streamlines) profile = afq_profile(FA_data, sft.streamlines, np.eye(4), weights=weights) ax.plot(profile) ax.set_title(bundle) plt.show() ########################################################################## # References: # ------------------------- # .. [Yeatman2012] Jason D Yeatman, Robert F Dougherty, Nathaniel J Myall, # Brian A Wandell, Heidi M Feldman, "Tract profiles of # white matter properties: automating fiber-tract # quantification", PloS One, 7: e49790 # # .. [Yeatman2014] Jason D Yeatman, Brian A Wandell, Aviv Mezer Feldman,
# scalar properties along the length of each bundle are queried from this # scalar data. Here, the contribution of each streamline is weighted according # to how representative this streamline is of the bundle overall. # # .. note:: # As a sanity check the anterior forceps the tract profile is relatively # symmetric? print("Extracting tract profiles...") for bundle in bundles: print(f"Extracting {bundle}...") tractogram = load_tractogram(op.join(working_dir, f'afq_{bundle}.trk'), img, to_space=Space.VOX) fig, ax = plt.subplots(1) weights = gaussian_weights(tractogram.streamlines) profile = afq_profile(FA_data, tractogram.streamlines, np.eye(4), weights=weights) ax.plot(profile) ax.set_title(bundle) plt.show() plt.savefig(op.join(working_dir, 'AntFrontal_tractprofile.png')) ########################################################################## # References: # ------------------------- # .. [Yeatman2012] Jason D Yeatman, Robert F Dougherty, Nathaniel J Myall, # Brian A Wandell, Heidi M Feldman, "Tract profiles of
def tract_profiles(self, data, subject_label, affine=np.eye(4), method='afq', metric='FA', n_points=100, weight=True): """ Calculate a summarized profile of data for each bundle along its length. Follows the approach outlined in [Yeatman2012]_. Parameters ---------- data : 3D volume The statistic to sample with the streamlines. subject_label : string String which identifies these bundles in the pandas dataframe. affine : array_like (4, 4), optional. The mapping from voxel coordinates to 'data' coordinates. Default: np.eye(4) method : string Method used to segment streamlines. Default: 'afq' metric : string Metric of statistic in data. Default: 'FA' n_points : int Number of points to resample to. Default: 100 weight : boolean Whether to calculate gaussian weights before profiling. Default: True """ self.to_space(Space.VOX) profiles = [] for bundle_name, bundle in self.bundles.items(): if weight: weights = gaussian_weights(bundle.streamlines, n_points=n_points) else: weights = None profile = afq_profile(data, bundle.streamlines, affine, weights=weights, n_points=n_points) for ii in range(len(profile)): # Subject, Bundle, node, method, metric (FA, MD), value profiles.append([ subject_label, bundle_name, ii, method, metric, profile[ii] ]) logging.disable(level=logging.WARNING) logging.disable(logging.NOTSET) profiles = pd.DataFrame( data=profiles, columns=["Subject", "Bundle", "Node", "Method", "Metric", "Value"]) return profiles
def clean_bundle(tg, n_points=100, clean_rounds=5, distance_threshold=5, length_threshold=4, min_sl=20, stat='mean', return_idx=False): """ Clean a segmented fiber group based on the Mahalnobis distance of each streamline Parameters ---------- streamlines : nibabel.Streamlines class instance. The streamlines constituting a fiber group. If streamlines is None, will use previously given streamlines. Default: None. clean_rounds : int, optional. Number of rounds of cleaning based on the Mahalanobis distance from the mean of extracted bundles. Default: 5 distance_threshold : float, optional. Threshold of cleaning based on the Mahalanobis distance (the units are standard deviations). Default: 5. length_threshold: float, optional Threshold for cleaning based on length (in standard deviations). Length of any streamline should not be *more* than this number of stdevs from the mean length. min_sl : int, optional. Number of streamlines in a bundle under which we will not bother with cleaning outliers. Default: 20. stat : callable or str, optional. The statistic of each node relative to which the Mahalanobis is calculated. Default: `np.mean` (but can also use median, etc.) using_idx : bool Whether 'streamlines' contains indices in the original streamlines. Default: False. Returns ------- A nibabel.Streamlines class instance containing only the streamlines that have a Mahalanobis distance smaller than `clean_threshold` from the mean of each one of the nodes. """ # Convert string to callable, if that's what you got. if isinstance(stat, str): stat = getattr(np, stat) # We don't even bother if there aren't enough streamlines: if len(tg.streamlines) < min_sl: if return_idx: return tg, np.arange(len(tg.streamlines)) else: return tg # Resample once up-front: fgarray = _resample_tg(tg, n_points) # Keep this around, so you can use it for indexing at the very end: idx = np.arange(len(fgarray)) # This calculates the Mahalanobis for each streamline/node: w = gaussian_weights(fgarray, return_mahalnobis=True, stat=stat) lengths = np.array([sl.shape[0] for sl in tg.streamlines]) # We'll only do this for clean_rounds rounds_elapsed = 0 while ((np.any(w > distance_threshold) or np.any(zscore(lengths) > length_threshold)) and rounds_elapsed < clean_rounds and len(tg.streamlines) > min_sl): # Select the fibers that have Mahalanobis smaller than the # threshold for all their nodes: idx_dist = np.where(np.all(w < distance_threshold, axis=-1))[0] idx_len = np.where(zscore(lengths) < length_threshold)[0] idx_belong = np.intersect1d(idx_dist, idx_len) if len(idx_belong) < min_sl: # need to sort and return exactly min_sl: idx_belong = np.argsort(np.sum(w, axis=-1))[:min_sl] idx = idx[idx_belong.astype(int)] # Update by selection: fgarray = fgarray[idx_belong.astype(int)] lengths = lengths[idx_belong.astype(int)] # Repeat: w = gaussian_weights(fgarray, return_mahalnobis=True) rounds_elapsed += 1 # Select based on the variable that was keeping track of things for us: out = StatefulTractogram(tg.streamlines[idx], tg, Space.VOX) if return_idx: return out, idx else: return out
def test_gaussian_weights(): # Some bogus x,y,z coordinates x = np.arange(10).astype(float) y = np.arange(10).astype(float) z = np.arange(10).astype(float) # Create a distribution for which we can predict the weights we would # expect to get: bundle = Streamlines([np.array([x, y, z]).T + 1, np.array([x, y, z]).T - 1]) # In this case, all nodes receives an equal weight of 0.5: w = gaussian_weights(bundle, n_points=10) npt.assert_almost_equal(w, np.ones((len(bundle), 10)) * 0.5) # Test when asked to return Mahalnobis, instead of weights w = gaussian_weights(bundle, n_points=10, return_mahalnobis=True) npt.assert_almost_equal(w, np.ones((len(bundle), 10))) # Here, some nodes are twice as far from the mean as others bundle = Streamlines([np.array([x, y, z]).T + 2, np.array([x, y, z]).T + 1, np.array([x, y, z]).T - 1, np.array([x, y, z]).T - 2]) w = gaussian_weights(bundle, n_points=10) # And their weights should be halved: npt.assert_almost_equal(w[0], w[1] / 2) npt.assert_almost_equal(w[-1], w[2] / 2) # Test the situation where all the streamlines have an identical node: arr1 = np.array([x, y, z]).T + 2 arr2 = np.array([x, y, z]).T + 1 arr3 = np.array([x, y, z]).T - 1 arr4 = np.array([x, y, z]).T - 2 arr1[0] = np.array([1, 1, 1]) arr2[0] = np.array([1, 1, 1]) arr3[0] = np.array([1, 1, 1]) arr4[0] = np.array([1, 1, 1]) bundle_w_id_node = Streamlines([arr1, arr2, arr3, arr4]) w = gaussian_weights(Streamlines(bundle_w_id_node), n_points=10) # For this case, the result should be a weight of 1/n_streamlines in that # node for all streamlines: npt.assert_equal(w[:, 0], np.ones(len(bundle_w_id_node)) * 1/len(bundle_w_id_node)) # Test the situation where all the streamlines are copies of each other: bundle_w_copies = Streamlines([bundle[0], bundle[0], bundle[0], bundle[0]]) w = gaussian_weights(bundle_w_copies, n_points=10) # In this case, the entire array should be equal to 1/n_streamlines: npt.assert_equal(w, np.ones(w.shape) * 1/len(bundle_w_id_node)) # Test with bundle of length 1: bundle_len_1 = Streamlines([bundle[0]]) w = gaussian_weights(bundle_len_1, n_points=10) npt.assert_equal(w, np.ones(w.shape)) bundle_len_1 = Streamlines([bundle[0]]) w = gaussian_weights(bundle_len_1, n_points=10, return_mahalnobis=True) npt.assert_equal(w, np.ones(w.shape) * np.nan)
def evaluate_along_streamlines(scalar_img, streamlines, beginnings, nr_points, dilate=0, predicted_peaks=None, affine=None): # Runtime: # - default: 2.7s (test), 56s (all), 10s (test 4 bundles, 100 points) # - map_coordinate order 1: 1.9s (test), 26s (all), 6s (test 4 bundles, 100 points) # - map_coordinate order 3: 2.2s (test), 33s (all), # - values_from_volume: 2.5s (test), 43s (all), # - AFQ: ?s (test), ?s (all), 85s (test 4 bundles, 100 points) # => AFQ a lot slower than others streamlines = list( transform_streamlines(streamlines, np.linalg.inv(affine))) for i in range(dilate): beginnings = binary_dilation(beginnings) beginnings = beginnings.astype(np.uint8) streamlines = _orient_to_same_start_region(streamlines, beginnings) if predicted_peaks is not None: # scalar img can also be orig peaks best_orig_peaks = fiber_utils.get_best_original_peaks( predicted_peaks, scalar_img, peak_len_thr=0.00001) scalar_img = np.linalg.norm(best_orig_peaks, axis=-1) algorithm = "distance_map" # equal_dist | distance_map | cutting_plane | afq if algorithm == "equal_dist": ### Sampling ### streamlines = fiber_utils.resample_fibers(streamlines, nb_points=nr_points) values = map_coordinates(scalar_img, np.array(streamlines).T, order=1) ### Aggregation ### values_mean = np.array(values).mean(axis=1) values_std = np.array(values).std(axis=1) return values_mean, values_std if algorithm == "distance_map": # cKDTree ### Sampling ### streamlines = fiber_utils.resample_fibers(streamlines, nb_points=nr_points) values = map_coordinates(scalar_img, np.array(streamlines).T, order=1) ### Aggregating by cKDTree approach ### metric = AveragePointwiseEuclideanMetric() qb = QuickBundles(threshold=100., metric=metric) clusters = qb.cluster(streamlines) centroids = Streamlines(clusters.centroids) if len(centroids) > 1: print("WARNING: number clusters > 1 ({})".format(len(centroids))) _, segment_idxs = cKDTree(centroids.data, 1, copy_data=True).query(streamlines, k=1) # (2000, 100) values_t = np.array(values).T # (2000, 100) # If we want to take weighted mean like in AFQ: # weights = dsa.gaussian_weights(Streamlines(streamlines)) # values_t = weights * values_t # return np.sum(values_t, 0), None results_dict = defaultdict(list) for idx, sl in enumerate(values_t): for jdx, seg in enumerate(sl): results_dict[segment_idxs[idx, jdx]].append(seg) if len(results_dict.keys()) < nr_points: print( "WARNING: found less than required points. Filling up with centroid values." ) centroid_values = map_coordinates(scalar_img, np.array([centroids[0]]).T, order=1) for i in range(nr_points): if len(results_dict[i]) == 0: results_dict[i].append(np.array(centroid_values).T[0, i]) results_mean = [] results_std = [] for key in sorted(results_dict.keys()): value = results_dict[key] if len(value) > 0: results_mean.append(np.array(value).mean()) results_std.append(np.array(value).std()) else: print("WARNING: empty segment") results_mean.append(0) results_std.append(0) return results_mean, results_std elif algorithm == "cutting_plane": # This will resample all streamline to have equally distant points (resulting in a different number of points # in each streamline). Then the "middle" of the tract will be estimated taking the middle element of the # centroid (estimated with QuickBundles). Then each streamline the point closest to the "middle" will be # calculated and points will be indexed for each streamline starting from the middle. Then averaging across # all streamlines will be done by taking the mean for points with same indices. ### Sampling ### streamlines = fiber_utils.resample_to_same_distance( streamlines, max_nr_points=nr_points) # map_coordinates does not allow streamlines with different lengths -> use values_from_volume values = np.array( values_from_volume(scalar_img, streamlines, affine=np.eye(4))).T ### Aggregating by Cutting Plane approach ### # Resample to all fibers having same number of points -> needed for QuickBundles streamlines_resamp = fiber_utils.resample_fibers(streamlines, nb_points=nr_points) metric = AveragePointwiseEuclideanMetric() qb = QuickBundles(threshold=100., metric=metric) clusters = qb.cluster(streamlines_resamp) centroids = Streamlines(clusters.centroids) # index of the middle cluster middle_idx = int(nr_points / 2) middle_point = centroids[0][middle_idx] # For each streamline get idx for the point which is closest to the middle segment_idxs = fiber_utils.get_idxs_of_closest_points( streamlines, middle_point) # Align along the middle and assign indices segment_idxs_eqlen = [] base_idx = 1000 # use higher index to avoid negative numbers for area below middle for idx, sl in enumerate(streamlines): sl_middle_pos = segment_idxs[idx] before_elems = sl_middle_pos after_elems = len(sl) - sl_middle_pos # indices for one streamline e.g. [998, 999, 1000, 1001, 1002, 1003]; 1000 is middle r = range((base_idx - before_elems), (base_idx + after_elems)) segment_idxs_eqlen.append(r) segment_idxs = segment_idxs_eqlen # Calcuate maximum number of indices to not result in more indices than nr_points. # (this could be case if one streamline is very off-center and therefore has a lot of points only on one # side. In this case the values too far out of this streamline will be cut off). max_idx = base_idx + int(nr_points / 2) min_idx = base_idx - int(nr_points / 2) # Group by segment indices results_dict = defaultdict(list) for idx, sl in enumerate(values): for jdx, seg in enumerate(sl): current_idx = segment_idxs[idx][jdx] if current_idx >= min_idx and current_idx < max_idx: results_dict[current_idx].append(seg) # If values missing fill up with centroid values if len(results_dict.keys()) < nr_points: print( "WARNING: found less than required points. Filling up with centroid values." ) centroid_sl = [centroids[0]] centroid_sl = np.array(centroid_sl).T centroid_values = map_coordinates(scalar_img, centroid_sl, order=1) for idx, seg_idx in enumerate(range(min_idx, max_idx)): if len(results_dict[seg_idx]) == 0: results_dict[seg_idx].append( np.array(centroid_values).T[0, idx]) # Aggregate by mean results_mean = [] results_std = [] for key in sorted(results_dict.keys()): value = results_dict[key] if len(value) > 0: results_mean.append(np.array(value).mean()) results_std.append(np.array(value).std()) else: print("WARNING: empty segment") results_mean.append(0) results_std.append(0) return results_mean, results_std elif algorithm == "afq": ### sampling + aggregation ### streamlines = fiber_utils.resample_fibers(streamlines, nb_points=nr_points) streamlines = Streamlines(streamlines) weights = dsa.gaussian_weights(streamlines) results_mean = dsa.afq_profile(scalar_img, streamlines, affine=np.eye(4), weights=weights) results_std = np.zeros(nr_points) return results_mean, results_std
def tract_profiles(subses_dict, clean_bundles_file, bundle_dict, scalar_dict, profile_weights, dwi_affine, tracking_params, segmentation_params): keys = [] vals = [] for k in bundle_dict.keys(): if k != "whole_brain": keys.append(bundle_dict[k]['uid']) vals.append(k) reverse_dict = dict(zip(keys, vals)) bundle_names = [] node_numbers = [] profiles = np.empty((len(scalar_dict), 0)).tolist() this_profile = np.zeros((len(scalar_dict), 100)) trk = nib.streamlines.load(clean_bundles_file) for b in np.unique(trk.tractogram.data_per_streamline['bundle']): idx = np.where(trk.tractogram.data_per_streamline['bundle'] == b)[0] this_sl = trk.streamlines[idx] bundle_name = reverse_dict[b] for ii, (scalar, scalar_file) in enumerate(scalar_dict.items()): scalar_data = nib.load(scalar_file).get_fdata() if isinstance(profile_weights, str): if profile_weights == "gauss": this_prof_weights = gaussian_weights(this_sl) elif profile_weights == "median": # weights bundle to only return the mean def _median_weight(bundle): fgarray = set_number_of_points(bundle, 100) values = np.array( values_from_volume(scalar_data, fgarray, dwi_affine)) weights = np.zeros(values.shape) for ii, jj in enumerate( np.argsort(values, axis=0)[len(values) // 2, :]): weights[jj, ii] = 1 return weights this_prof_weights = _median_weight else: this_prof_weights = profile_weights this_profile[ii] = afq_profile(scalar_data, this_sl, dwi_affine, weights=this_prof_weights) profiles[ii].extend(list(this_profile[ii])) nodes = list(np.arange(this_profile[0].shape[0])) bundle_names.extend([bundle_name] * len(nodes)) node_numbers.extend(nodes) profile_dict = dict() profile_dict["tractID"] = bundle_names profile_dict["nodeID"] = node_numbers for ii, scalar in enumerate(scalar_dict.keys()): profile_dict[scalar] = profiles[ii] profile_dframe = pd.DataFrame(profile_dict) meta = dict(source=clean_bundles_file, parameters=get_default_args(afq_profile)) return profile_dframe, meta