def qb_metrics_features(streamlines, threshold=10.0, metric=None, max_nb_clusters=np.iinfo('i4').max): """ Enhancing QuickBundles with different metrics and features metric: 'IF', 'RF', 'CoMF', 'MF', 'AF', 'VBEF', None """ if metric == 'IF': feature = IdentityFeature() metric = AveragePointwiseEuclideanMetric(feature=feature) elif metric == 'RF': feature = ResampleFeature(nb_point=24) metric = AveragePointwiseEuclideanMetric(feature=feature) elif metric == 'CoMF': feature = CenterOfMassFeature() metric = EuclideanMetric(feature) elif metric == 'MF': feature = MidpointFeature() metric = EuclideanMetric(feature) elif metric == 'AF': feature = ArcLengthFeature() metric = EuclideanMetric(feature) elif metric == 'VBEF': feature = VectorOfEndpointsFeature() metric = CosineMetric(feature) else: metric = "MDF_12points" qb = QuickBundles(threshold=threshold, metric=metric, max_nb_clusters=max_nb_clusters) clusters = qb.cluster(streamlines) labels = np.array(len(streamlines) * [None]) N_list = [] for i in range(len(clusters)): N_list.append(clusters[i]['N']) data_clusters = [] for i in range(len(clusters)): labels[clusters[i]['indices']] = i + 1 data_clusters.append(streamlines[clusters[i]['indices']]) return labels, data_clusters, N_list
streamline. """ import numpy as np from dipy.viz import window, actor, colormap from dipy.segment.clustering import QuickBundles from dipy.segment.metric import CenterOfMassFeature from dipy.segment.metric import EuclideanMetric # Enables/disables interactive visualization interactive = False # Get some streamlines. streamlines = get_streamlines() # Previously defined. feature = CenterOfMassFeature() metric = EuclideanMetric(feature) qb = QuickBundles(threshold=5., metric=metric) clusters = qb.cluster(streamlines) # Extract feature of every streamline. centers = np.asarray(list(map(feature.extract, streamlines))) # Color each center of mass according to the cluster they belong to. colormap = colormap.create_colormap(np.arange(len(clusters))) colormap_full = np.ones((len(streamlines), 3)) for cluster, color in zip(clusters, colormap): colormap_full[cluster.indices] = color # Visualization
def data_prep(directory, bundle, n, n_center, r_neighbor, datatype): """ Returns a lists of vertices, corresponding labels (1 for bundle of interest, 0 for others), and the multi-level laplacians for training or a list of distances of the tracts to the BOI for validation or testing Input: directory - Folder with the 72 bundles of a subject bundle - Which bundle is the BOI n - The number of points sampled on each tract n_center - Number of centers of mass which determine the neighborhood for training data r_neighbor - The maximum ratio you want the neighboorhood tracts to represent during training datatype - "training" or "test_or_val", test_or_val returns graphdata and labels for all tracts of the subject, whereas training only returns the bundle of interest, neighbouring tracts (depending on r_neighbor), and randomly sampled tracts from other bundles to create a balanced training set """ data_all, points, center_list = [], [], [] FirstIteration1, FirstIteration2 = True, True for filename in os.listdir(directory): # Loop through bundels if filename == (bundle + ".trk"): # Only look at BOI trk_path = os.path.join(directory, filename) streams, _ = trackvis.read(trk_path) streamlines = [s[0] for s in streams] # List of tracts for tract in streamlines: x_fine, y_fine, z_fine = sample_tracts(tract, n, 2) vertices = np.hstack([ x_fine.reshape(x_fine.shape[0], 1), y_fine.reshape(y_fine.shape[0], 1), z_fine.reshape(z_fine.shape[0], 1) ]) if FirstIteration1: data, L, perm = first_coarsen(vertices, n) FirstIteration1 = False else: data = coarsen_again(vertices, perm) data_all.append((data, 1)) points_loc = [int(j) for j in np.linspace(0, n - 1, n_center)] points.append([vertices[i, :] for i in points_loc]) # Centroid(s) per tract feature = CenterOfMassFeature() if FirstIteration2: # First tracts determines orientation for the rest of the bundle ref = points[0][int( n_center / 4)] # Look at point between the end and the median FirstIteration2 = False centroids_flipped = [] for i in points: first_point = i[int(n_center / 4)] last_point = i[-int(n_center / 4) - 1] if calc_dist_3d(ref, last_point) < calc_dist_3d( ref, first_point): i = list( reversed(i) ) # Flip points if the tract is in the opposite direction centroids_flipped.append(i) for j in range(n_center): points = [k[j] for k in centroids_flipped] center = list(map(feature.extract, [np.array(points)]))[0][0] # Center of mass center_list.append(center) if datatype == "training": max_distances = [] for l in range(len(center_list)): if l == 0: dist = calc_dist_3d(center_list[l], center_list[l + 1]) # One end elif l == n_center - 1: dist = calc_dist_3d(center_list[l], center_list[l - 1]) # Other end else: dist = max([ calc_dist_3d(center_list[l], center_list[l + 1]), calc_dist_3d(center_list[l], center_list[l - 1]) ]) # In between max_distances.append(dist) break # No need for more negative data than positive, so limit number of negative labels n_tracts_neg = 0 if datatype == "training": for filename in os.listdir(directory): if filename != (bundle + ".trk"): # Every bundle but the BOI trk_path = os.path.join(directory, filename) streams, _ = trackvis.read(trk_path) n_tracts_neg += len(streams) neg_id_list = random.sample( range(n_tracts_neg), 2 * len(data_all) ) # Times 2 to be sure tracts in the neighboorhood will not be repeated and still have a balanced training set neg_id = 0 data_near, data_rest, min_dist_l = [], [], [] for filename in os.listdir(directory): trk_path = os.path.join(directory, filename) streams, _ = trackvis.read(trk_path) streamlines = [s[0] for s in streams] if filename != (bundle + ".trk"): # Every bundle but the BOI tract_id = 0 # Initialize counting tract_id_list = [] for tract in streamlines: x_fine, y_fine, z_fine = sample_tracts(tract, n, 2) vertices = np.hstack([ x_fine.reshape(x_fine.shape[0], 1), y_fine.reshape(y_fine.shape[0], 1), z_fine.reshape(z_fine.shape[0], 1) ]) if datatype == "training": # Add neighbouring tracts nrpoints = 0 # Initialize counting of number of points in the neighborhood for point in vertices: list_dist = [] for p in center_list: list_dist.append(calc_dist_3d(point, p)) FirstIteration3 = True for i in range(len(center_list)): if list_dist[i] < max_distances[ i] and FirstIteration3: # In neighborhood FirstIteration3 = False # Do not count the same point twice nrpoints += 1 if nrpoints == n / 2: # If half of the points lay in the neighborhood data = coarsen_again(vertices, perm) data_near.append((data, 0)) if r_neighbor != 0: tract_id_list.append(tract_id) break elif datatype == "test_or_val": # Add every tract for testing or validation dataset min_dist = float("inf") # Initialize minimum distance data = coarsen_again(vertices, perm) data_all.append((data, 0)) feature = CenterOfMassFeature() c = list(map(feature.extract, [vertices]))[0][0] # Center of mass of tract for p in center_list: d = calc_dist_3d( c, p ) # Distance to one of the centers of mass of the BOI if d < min_dist: min_dist = d min_dist_l.append(min_dist) else: sys.exit( "Please enter a valid datatype ('training' or 'test_or_val')" ) tract_id += 1 if datatype == "training": # Add additional random tracts tract_id2 = 0 # Initialize a second counting extra_tracts = [ x for x in range(len(streamlines)) if x not in tract_id_list ] # Do not consider tracts that are already in the neighbourhood for tract in streamlines: if tract_id2 in extra_tracts and neg_id in neg_id_list: x_fine, y_fine, z_fine = sample_tracts(tract, n, 2) vertices = np.hstack([ x_fine.reshape(x_fine.shape[0], 1), y_fine.reshape(y_fine.shape[0], 1), z_fine.reshape(z_fine.shape[0], 1) ]) data = coarsen_again(vertices, perm) data_rest.append((data, 0)) tract_id2 += 1 # Count extra tracts neg_id += 1 # Count tracts (all bundles) if datatype == "training": # Sample for a balanced training set if len(data_near) > int(r_neighbor * len(data_all)): data_neg = random.sample(data_near, int(r_neighbor*len(data_all))) + \ random.sample(data_rest, int((1-r_neighbor)*len(data_all))) else: data_neg = data_near + random.sample( data_rest, len(data_all) - len(data_near) ) # Fill the rest with random tracts if necassary data_all.extend(data_neg) X, y = zip(*data_all) # Seperate vertices and labels if datatype == "training": return list(X), list(y), L return list(X), list(y), min_dist_l