def load_tractography_dataset(subject_files, volume_manager, name="HCP", use_sh_coeffs=False, mean_centering=True): subjects = [] with Timer(" Loading subject(s)", newline=True): for subject_file in sorted(subject_files): print(" {}".format(subject_file)) tracto_data = TractographyData.load(subject_file) dwi = tracto_data.signal bvals = tracto_data.gradients.bvals bvecs = tracto_data.gradients.bvecs if use_sh_coeffs: # Use 45 spherical harmonic coefficients to represent the diffusion signal. volume = neurotools.get_spherical_harmonics_coefficients( dwi, bvals, bvecs, mean_centering=mean_centering).astype(np.float32) else: # Resample the diffusion signal to have 100 directions. volume = neurotools.resample_dwi( dwi, bvals, bvecs, mean_centering=mean_centering).astype(np.float32) tracto_data.signal.uncache( ) # Free some memory as we don't need the original signal. subject_id = volume_manager.register(volume) tracto_data.subject_id = subject_id subjects.append(tracto_data) return TractographyDataset(subjects, name, keep_on_cpu=True)
def main(): parser = build_argparser() args = parser.parse_args() signal = nib.load(args.signal) signal.get_data() # Forces loading volume in-memory. basename = re.sub('(\.gz|\.nii.gz)$', '', args.signal) bvals = basename + '.bvals' if args.bvals is None else args.bvals bvecs = basename + '.bvecs' if args.bvecs is None else args.bvecs gradients = gradient_table(bvals, bvecs) tracto_data = TractographyData(signal, gradients) # Compute matrix that brings streamlines back to diffusion voxel space. rasmm2vox_affine = np.linalg.inv(signal.affine) # Retrieve data. with Timer("Retrieving data", newline=args.verbose): for filename in sorted(args.bundles): if args.verbose: print("{}".format(filename)) # Load streamlines tfile = nib.streamlines.load(filename) tfile.tractogram.apply_affine(rasmm2vox_affine) # Add streamlines to the TractogramData bundle_name = os.path.splitext(os.path.basename(filename))[0] tracto_data.add(tfile.streamlines, bundle_name) if args.verbose: diff = tracto_data.streamlines._data - tracto_data.streamlines._data.astype(args.dtype) precision_error = np.sum(np.sqrt(np.sum(diff**2, axis=1))) avg_precision_error = precision_error/len(tracto_data.streamlines._data) print("Precision error: {} (avg. {})".format(precision_error, avg_precision_error)) # Save streamlines coordinates using either float16 or float32. tracto_data.streamlines._data = tracto_data.streamlines._data.astype(args.dtype) # Save dataset tracto_data.save(args.out)
def load_tractography_dataset(subject_files, volume_manager, name="HCP", use_sh_coeffs=False): subjects = [] with Timer(" Loading subject(s)", newline=True): for subject_file in sorted(subject_files): print(" {}".format(subject_file)) tracto_data = TractographyData.load(subject_file) dwi = tracto_data.signal bvals = tracto_data.gradients.bvals bvecs = tracto_data.gradients.bvecs if use_sh_coeffs: # Use 45 spherical harmonic coefficients to represent the diffusion signal. volume = neurotools.get_spherical_harmonics_coefficients(dwi, bvals, bvecs).astype(np.float32) else: # Resample the diffusion signal to have 100 directions. volume = neurotools.resample_dwi(dwi, bvals, bvecs).astype(np.float32) tracto_data.signal.uncache() # Free some memory as we don't need the original signal. subject_id = volume_manager.register(volume) tracto_data.subject_id = subject_id subjects.append(tracto_data) return TractographyDataset(subjects, name, keep_on_cpu=True)
def main(): parser = build_argparser() args = parser.parse_args() tracto_data = None if args.signal_source == "raw_signal": signal = nib.load(args.signal) signal.get_data() # Forces loading volume in-memory. basename = re.sub('(\.gz|\.nii.gz)$', '', args.signal) try: bvals = basename + '.bvals' if args.bvals is None else args.bvals bvecs = basename + '.bvecs' if args.bvecs is None else args.bvecs gradients = gradient_table(bvals, bvecs) except FileNotFoundError: try: bvals = basename + '.bval' if args.bvals is None else args.bvals bvecs = basename + '.bvec' if args.bvecs is None else args.bvecs gradients = gradient_table(bvals, bvecs) except FileNotFoundError as e: print("Could not find .bvals/.bvecs or .bval/.bvec files...") raise e tracto_data = TractographyData(signal, gradients) elif args.signal_source == "processed_signal": loaded_tracto_data = TractographyData.load(args.tracto_data) tracto_data = TractographyData(loaded_tracto_data.signal, loaded_tracto_data.gradients) # Compute matrix that brings streamlines back to diffusion voxel space. rasmm2vox_affine = np.linalg.inv(tracto_data.signal.affine) # Retrieve data. with Timer("Retrieving data", newline=args.verbose): for filename in sorted(args.bundles): if args.verbose: print("{}".format(filename)) # Load streamlines tfile = nib.streamlines.load(filename) tractogram = tfile.tractogram original_streamlines = tractogram.streamlines lengths = length(original_streamlines) streamlines = [ s for (s, l) in zip(original_streamlines, lengths) if l >= args.min_length ] # Make sure file is not empty if len(streamlines) > 0: if args.subsample_streamlines: output_streamlines = subsample_streamlines( streamlines, args.clustering_threshold, args.removal_distance) print("Total difference: {} / {}".format( len(original_streamlines), len(output_streamlines))) new_tractogram = nib.streamlines.Tractogram( output_streamlines, affine_to_rasmm=tractogram.affine_to_rasmm) tractogram = new_tractogram tractogram.apply_affine(rasmm2vox_affine) # Add streamlines to the TractogramData bundle_name = os.path.splitext(os.path.basename(filename))[0] tracto_data.add(tractogram.streamlines, bundle_name) if args.verbose: diff = tracto_data.streamlines._data - tracto_data.streamlines._data.astype( args.dtype) precision_error = np.sum(np.sqrt(np.sum(diff**2, axis=1))) avg_precision_error = precision_error / len( tracto_data.streamlines._data) print("Precision error: {} (avg. {})".format(precision_error, avg_precision_error)) # Save streamlines coordinates using either float16 or float32. tracto_data.streamlines._data = tracto_data.streamlines._data.astype( args.dtype) # Save dataset tracto_data.save(args.out)
def horizon_flow(input_files, cluster=False, cluster_thr=15., random_colors=False, verbose=True, length_lt=0, length_gt=1000, clusters_lt=0, clusters_gt=10**8, noisy_streamlines_sigma=0.): """ Horizon Parameters ---------- input_files : variable string cluster : bool, optional cluster_thr : float, optional random_colors : bool, optional verbose : bool, optional length_lt : float, optional length_gt : float, optional clusters_lt : int, optional clusters_gt : int, optional noisy_streamlines_sigma : float, optional """ filenames = input_files # glob(input_files) tractograms = [] data = None affine = None for i, f in enumerate(filenames): if verbose: print('Loading file ...') print(f) print('\n') if f.endswith('.trk') or f.endswith('.tck'): streamlines = nib.streamlines.load(f).streamlines idx = np.arange(len(streamlines)) rng = np.random.RandomState(42) rng.shuffle(idx) streamlines = streamlines[idx[:100]] if noisy_streamlines_sigma > 0. and i > 0: streamlines = add_noise_to_streamlines(streamlines, noisy_streamlines_sigma) tractograms.append(streamlines) if f.endswith('.npz'): tractography_data = TractographyData.load(f) # idx = np.arange(len(tractography_data.streamlines)) # rng = np.random.RandomState(42) # rng.shuffle(idx) # tractography_data.streamlines = tractography_data.streamlines[idx[:200]] # tractograms.append(tractography_data.streamlines) M = 2 # Take M streamlines per bundle for k in sorted(tractography_data.name2id.keys()): bundle_id = tractography_data.name2id[k] streamlines = tractography_data.streamlines[tractography_data.bundle_ids == bundle_id][:M].copy() streamlines._lengths = streamlines._lengths.astype("int64") streamlines = set_number_of_points(streamlines, nb_points=40) tractograms.append(streamlines) if hasattr(tractography_data, 'signal'): signal = tractography_data.signal.get_data() data = signal[:, :, :, 0] affine = np.eye(4) if f.endswith('.nii.gz') or f.endswith('.nii'): img = nib.load(f) data = img.get_data() affine = img.get_affine() if verbose: print(affine) # tmp save # tractogram = nib.streamlines.Tractogram(tractograms[0]) # tractogram.apply_affine(img.affine) # nib.streamlines.save(tractogram, "tmp.tck") # exit() horizon(tractograms, data, affine, cluster, cluster_thr, random_colors, length_lt, length_gt, clusters_lt, clusters_gt)
def main(): parser = buildArgsParser() args = parser.parse_args() data = TractographyData.load(args.dataset) streamlines = data.streamlines print("{} has {:,} streamlines".format(args.dataset, len(streamlines))) if args.list_bundles_name: for bundle_name in data.bundle_names: bundle_id = data.name2id[bundle_name] print("{}: {}".format(bundle_id, bundle_name)) return if args.leave_one_out is not None: with Timer("Splitting {} using a leave-one-out strategy".format( args.dataset), newline=True): for bundle in args.leave_one_out: rng = np.random.RandomState(args.seed) train_data = TractographyData(data.signal, data.gradients, data.name2id) valid_data = TractographyData(data.signal, data.gradients, data.name2id) test_data = TractographyData(data.signal, data.gradients, data.name2id) bundle_ids_to_exclude = list(map(int, bundle.split(','))) missing_bundles_name = [ data.bundle_names[i] for i in bundle_ids_to_exclude ] if args.verbose: print("Leaving out {}...".format( ", ".join(missing_bundles_name))) include = np.ones(len(data.bundle_ids), dtype=bool) exclude = np.zeros(len(data.bundle_ids), dtype=bool) for i in bundle_ids_to_exclude: include = np.logical_and(include, data.bundle_ids != i) exclude = np.logical_or(exclude, data.bundle_ids == i) include_idx = np.where(include)[0] exclude_idx = np.where(exclude)[0] rng.shuffle(include_idx) rng.shuffle(exclude_idx) trainset_indices = include_idx validset_indices = exclude_idx[:len(exclude_idx) // 2] testset_indices = exclude_idx[len(exclude_idx) // 2:] train_data.add(streamlines[trainset_indices], bundle_ids=data.bundle_ids[trainset_indices]) valid_data.add(streamlines[validset_indices], bundle_ids=data.bundle_ids[validset_indices]) test_data.add(streamlines[testset_indices], bundle_ids=data.bundle_ids[testset_indices]) filename = "missing_{}.npz".format( "_".join(missing_bundles_name)) with Timer("Saving dataset: {}".format(filename[:-4])): train_data.save(filename[:-4] + "_trainset.npz") valid_data.save(filename[:-4] + "_validset.npz") test_data.save(filename[:-4] + "_testset.npz") else: rng = np.random.RandomState(args.seed) train_data = TractographyData(data.signal, data.gradients, data.name2id) valid_data = TractographyData(data.signal, data.gradients, data.name2id) test_data = TractographyData(data.signal, data.gradients, data.name2id) with Timer("Splitting {} as follow {} using {}".format( args.dataset, args.split, args.split_type), newline=args.verbose): for bundle_name in data.bundle_names: if args.verbose: print("Splitting bundle {}...".format(bundle_name)) bundle_id = data.name2id[bundle_name] indices = np.where(data.bundle_ids == bundle_id)[0] nb_examples = len(indices) rng.shuffle(indices) if args.split_type == "percentage": trainset_size = int(np.round(args.split[0] * nb_examples)) validset_size = int(np.round(args.split[1] * nb_examples)) testset_size = int(np.round(args.split[2] * nb_examples)) # Make sure the splits sum to nb_examples testset_size += nb_examples - ( trainset_size + validset_size + testset_size) elif args.split_type == "count": raise NotImplementedError( "Split type `count` not implemented yet!") assert trainset_size + validset_size + testset_size == nb_examples trainset_indices = indices[:trainset_size] validset_indices = indices[trainset_size:-testset_size] testset_indices = indices[-testset_size:] train_data.add(streamlines[trainset_indices], bundle_name) valid_data.add(streamlines[validset_indices], bundle_name) test_data.add(streamlines[testset_indices], bundle_name) with Timer("Saving"): train_data.save(args.dataset[:-4] + "_trainset.npz") valid_data.save(args.dataset[:-4] + "_validset.npz") test_data.save(args.dataset[:-4] + "_testset.npz") if args.delete: os.remove(args.dataset)
def main(): parser = buildArgsParser() args = parser.parse_args() data = TractographyData.load(args.dataset) streamlines = data.streamlines print("{} has {:,} streamlines".format(args.dataset, len(streamlines))) if args.list_bundles_name: for bundle_name in data.bundle_names: bundle_id = data.name2id[bundle_name] print("{}: {}".format(bundle_id, bundle_name)) return if args.leave_one_out is not None: with Timer("Splitting {} using a leave-one-out strategy".format(args.dataset), newline=True): for bundle in args.leave_one_out: rng = np.random.RandomState(args.seed) train_data = TractographyData(data.signal, data.gradients, data.name2id) valid_data = TractographyData(data.signal, data.gradients, data.name2id) test_data = TractographyData(data.signal, data.gradients, data.name2id) bundle_ids_to_exclude = list(map(int, bundle.split(','))) missing_bundles_name = [data.bundle_names[i] for i in bundle_ids_to_exclude] if args.verbose: print("Leaving out {}...".format(", ".join(missing_bundles_name))) include = np.ones(len(data.bundle_ids), dtype=bool) exclude = np.zeros(len(data.bundle_ids), dtype=bool) for i in bundle_ids_to_exclude: include = np.logical_and(include, data.bundle_ids != i) exclude = np.logical_or(exclude, data.bundle_ids == i) include_idx = np.where(include)[0] exclude_idx = np.where(exclude)[0] rng.shuffle(include_idx) rng.shuffle(exclude_idx) trainset_indices = include_idx validset_indices = exclude_idx[:len(exclude_idx)//2] testset_indices = exclude_idx[len(exclude_idx)//2:] train_data.add(streamlines[trainset_indices], bundle_ids=data.bundle_ids[trainset_indices]) valid_data.add(streamlines[validset_indices], bundle_ids=data.bundle_ids[validset_indices]) test_data.add(streamlines[testset_indices], bundle_ids=data.bundle_ids[testset_indices]) filename = "missing_{}.npz".format("_".join(missing_bundles_name)) with Timer("Saving dataset: {}".format(filename[:-4])): train_data.save(filename[:-4] + "_trainset.npz") valid_data.save(filename[:-4] + "_validset.npz") test_data.save(filename[:-4] + "_testset.npz") else: rng = np.random.RandomState(args.seed) train_data = TractographyData(data.signal, data.gradients, data.name2id) valid_data = TractographyData(data.signal, data.gradients, data.name2id) test_data = TractographyData(data.signal, data.gradients, data.name2id) with Timer("Splitting {} as follow {} using {}".format(args.dataset, args.split, args.split_type), newline=args.verbose): for bundle_name in data.bundle_names: if args.verbose: print("Splitting bundle {}...".format(bundle_name)) bundle_id = data.name2id[bundle_name] indices = np.where(data.bundle_ids == bundle_id)[0] nb_examples = len(indices) rng.shuffle(indices) if args.split_type == "percentage": trainset_size = int(np.round(args.split[0] * nb_examples)) validset_size = int(np.round(args.split[1] * nb_examples)) testset_size = int(np.round(args.split[2] * nb_examples)) # Make sure the splits sum to nb_examples testset_size += nb_examples - (trainset_size + validset_size + testset_size) elif args.split_type == "count": raise NotImplementedError("Split type `count` not implemented yet!") assert trainset_size + validset_size + testset_size == nb_examples trainset_indices = indices[:trainset_size] validset_indices = indices[trainset_size:-testset_size] testset_indices = indices[-testset_size:] train_data.add(streamlines[trainset_indices], bundle_name) valid_data.add(streamlines[validset_indices], bundle_name) test_data.add(streamlines[testset_indices], bundle_name) with Timer("Saving"): train_data.save(args.dataset[:-4] + "_trainset.npz") valid_data.save(args.dataset[:-4] + "_validset.npz") test_data.save(args.dataset[:-4] + "_testset.npz") if args.delete: os.remove(args.dataset)
def load_tractography_dataset_from_dwi_and_tractogram(dwi, tractogram, volume_manager, use_sh_coeffs=False, bvals=None, bvecs=None, step_size=None, mean_centering=True): # Load signal signal = nib.load(dwi) signal.get_data() # Forces loading volume in-memory. basename = re.sub('(\.gz|\.nii.gz)$', '', dwi) bvals = basename + '.bvals' if bvals is None else bvals bvecs = basename + '.bvecs' if bvecs is None else bvecs gradients = gradient_table(bvals, bvecs) tracto_data = TractographyData(signal, gradients) # Load streamlines tfile = nib.streamlines.load(tractogram) tractogram = tfile.tractogram # Resample streamline to have a fixed step size, if needed. if step_size is not None: print("Resampling streamlines to have a step size of {}mm".format( step_size)) streamlines = tractogram.streamlines streamlines._lengths = streamlines._lengths.astype(int) streamlines._offsets = streamlines._offsets.astype(int) lengths = length(streamlines) nb_points = np.ceil(lengths / step_size).astype(int) new_streamlines = (set_number_of_points(s, n) for s, n in zip(streamlines, nb_points)) tractogram = nib.streamlines.Tractogram(new_streamlines, affine_to_rasmm=np.eye(4)) # Compute matrix that brings streamlines back to diffusion voxel space. rasmm2vox_affine = np.linalg.inv(signal.affine) tractogram.apply_affine(rasmm2vox_affine) # Add streamlines to the TractogramData tracto_data.add(tractogram.streamlines, "tractogram") dwi = tracto_data.signal bvals = tracto_data.gradients.bvals bvecs = tracto_data.gradients.bvecs if use_sh_coeffs: # Use 45 spherical harmonic coefficients to represent the diffusion signal. volume = neurotools.get_spherical_harmonics_coefficients( dwi, bvals, bvecs, mean_centering=mean_centering).astype(np.float32) else: # Resample the diffusion signal to have 100 directions. volume = neurotools.resample_dwi(dwi, bvals, bvecs, mean_centering=mean_centering).astype( np.float32) tracto_data.signal.uncache( ) # Free some memory as we don't need the original signal. subject_id = volume_manager.register(volume) tracto_data.subject_id = subject_id return TractographyDataset([tracto_data], "dataset", keep_on_cpu=True)