def main(): parser = build_argparser() args = parser.parse_args() signal = nib.load(args.signal) signal.get_data() # Forces loading volume in-memory. basename = re.sub('(\.gz|\.nii.gz)$', '', args.signal) bvals = basename + '.bvals' if args.bvals is None else args.bvals bvecs = basename + '.bvecs' if args.bvecs is None else args.bvecs gradients = gradient_table(bvals, bvecs) tracto_data = TractographyData(signal, gradients) # Compute matrix that brings streamlines back to diffusion voxel space. rasmm2vox_affine = np.linalg.inv(signal.affine) # Retrieve data. with Timer("Retrieving data", newline=args.verbose): for filename in sorted(args.bundles): if args.verbose: print("{}".format(filename)) # Load streamlines tfile = nib.streamlines.load(filename) tfile.tractogram.apply_affine(rasmm2vox_affine) # Add streamlines to the TractogramData bundle_name = os.path.splitext(os.path.basename(filename))[0] tracto_data.add(tfile.streamlines, bundle_name) if args.verbose: diff = tracto_data.streamlines._data - tracto_data.streamlines._data.astype(args.dtype) precision_error = np.sum(np.sqrt(np.sum(diff**2, axis=1))) avg_precision_error = precision_error/len(tracto_data.streamlines._data) print("Precision error: {} (avg. {})".format(precision_error, avg_precision_error)) # Save streamlines coordinates using either float16 or float32. tracto_data.streamlines._data = tracto_data.streamlines._data.astype(args.dtype) # Save dataset tracto_data.save(args.out)
def main(): parser = build_argparser() args = parser.parse_args() tracto_data = None if args.signal_source == "raw_signal": signal = nib.load(args.signal) signal.get_data() # Forces loading volume in-memory. basename = re.sub('(\.gz|\.nii.gz)$', '', args.signal) try: bvals = basename + '.bvals' if args.bvals is None else args.bvals bvecs = basename + '.bvecs' if args.bvecs is None else args.bvecs gradients = gradient_table(bvals, bvecs) except FileNotFoundError: try: bvals = basename + '.bval' if args.bvals is None else args.bvals bvecs = basename + '.bvec' if args.bvecs is None else args.bvecs gradients = gradient_table(bvals, bvecs) except FileNotFoundError as e: print("Could not find .bvals/.bvecs or .bval/.bvec files...") raise e tracto_data = TractographyData(signal, gradients) elif args.signal_source == "processed_signal": loaded_tracto_data = TractographyData.load(args.tracto_data) tracto_data = TractographyData(loaded_tracto_data.signal, loaded_tracto_data.gradients) # Compute matrix that brings streamlines back to diffusion voxel space. rasmm2vox_affine = np.linalg.inv(tracto_data.signal.affine) # Retrieve data. with Timer("Retrieving data", newline=args.verbose): for filename in sorted(args.bundles): if args.verbose: print("{}".format(filename)) # Load streamlines tfile = nib.streamlines.load(filename) tractogram = tfile.tractogram original_streamlines = tractogram.streamlines lengths = length(original_streamlines) streamlines = [ s for (s, l) in zip(original_streamlines, lengths) if l >= args.min_length ] # Make sure file is not empty if len(streamlines) > 0: if args.subsample_streamlines: output_streamlines = subsample_streamlines( streamlines, args.clustering_threshold, args.removal_distance) print("Total difference: {} / {}".format( len(original_streamlines), len(output_streamlines))) new_tractogram = nib.streamlines.Tractogram( output_streamlines, affine_to_rasmm=tractogram.affine_to_rasmm) tractogram = new_tractogram tractogram.apply_affine(rasmm2vox_affine) # Add streamlines to the TractogramData bundle_name = os.path.splitext(os.path.basename(filename))[0] tracto_data.add(tractogram.streamlines, bundle_name) if args.verbose: diff = tracto_data.streamlines._data - tracto_data.streamlines._data.astype( args.dtype) precision_error = np.sum(np.sqrt(np.sum(diff**2, axis=1))) avg_precision_error = precision_error / len( tracto_data.streamlines._data) print("Precision error: {} (avg. {})".format(precision_error, avg_precision_error)) # Save streamlines coordinates using either float16 or float32. tracto_data.streamlines._data = tracto_data.streamlines._data.astype( args.dtype) # Save dataset tracto_data.save(args.out)
def main(): parser = buildArgsParser() args = parser.parse_args() data = TractographyData.load(args.dataset) streamlines = data.streamlines print("{} has {:,} streamlines".format(args.dataset, len(streamlines))) if args.list_bundles_name: for bundle_name in data.bundle_names: bundle_id = data.name2id[bundle_name] print("{}: {}".format(bundle_id, bundle_name)) return if args.leave_one_out is not None: with Timer("Splitting {} using a leave-one-out strategy".format( args.dataset), newline=True): for bundle in args.leave_one_out: rng = np.random.RandomState(args.seed) train_data = TractographyData(data.signal, data.gradients, data.name2id) valid_data = TractographyData(data.signal, data.gradients, data.name2id) test_data = TractographyData(data.signal, data.gradients, data.name2id) bundle_ids_to_exclude = list(map(int, bundle.split(','))) missing_bundles_name = [ data.bundle_names[i] for i in bundle_ids_to_exclude ] if args.verbose: print("Leaving out {}...".format( ", ".join(missing_bundles_name))) include = np.ones(len(data.bundle_ids), dtype=bool) exclude = np.zeros(len(data.bundle_ids), dtype=bool) for i in bundle_ids_to_exclude: include = np.logical_and(include, data.bundle_ids != i) exclude = np.logical_or(exclude, data.bundle_ids == i) include_idx = np.where(include)[0] exclude_idx = np.where(exclude)[0] rng.shuffle(include_idx) rng.shuffle(exclude_idx) trainset_indices = include_idx validset_indices = exclude_idx[:len(exclude_idx) // 2] testset_indices = exclude_idx[len(exclude_idx) // 2:] train_data.add(streamlines[trainset_indices], bundle_ids=data.bundle_ids[trainset_indices]) valid_data.add(streamlines[validset_indices], bundle_ids=data.bundle_ids[validset_indices]) test_data.add(streamlines[testset_indices], bundle_ids=data.bundle_ids[testset_indices]) filename = "missing_{}.npz".format( "_".join(missing_bundles_name)) with Timer("Saving dataset: {}".format(filename[:-4])): train_data.save(filename[:-4] + "_trainset.npz") valid_data.save(filename[:-4] + "_validset.npz") test_data.save(filename[:-4] + "_testset.npz") else: rng = np.random.RandomState(args.seed) train_data = TractographyData(data.signal, data.gradients, data.name2id) valid_data = TractographyData(data.signal, data.gradients, data.name2id) test_data = TractographyData(data.signal, data.gradients, data.name2id) with Timer("Splitting {} as follow {} using {}".format( args.dataset, args.split, args.split_type), newline=args.verbose): for bundle_name in data.bundle_names: if args.verbose: print("Splitting bundle {}...".format(bundle_name)) bundle_id = data.name2id[bundle_name] indices = np.where(data.bundle_ids == bundle_id)[0] nb_examples = len(indices) rng.shuffle(indices) if args.split_type == "percentage": trainset_size = int(np.round(args.split[0] * nb_examples)) validset_size = int(np.round(args.split[1] * nb_examples)) testset_size = int(np.round(args.split[2] * nb_examples)) # Make sure the splits sum to nb_examples testset_size += nb_examples - ( trainset_size + validset_size + testset_size) elif args.split_type == "count": raise NotImplementedError( "Split type `count` not implemented yet!") assert trainset_size + validset_size + testset_size == nb_examples trainset_indices = indices[:trainset_size] validset_indices = indices[trainset_size:-testset_size] testset_indices = indices[-testset_size:] train_data.add(streamlines[trainset_indices], bundle_name) valid_data.add(streamlines[validset_indices], bundle_name) test_data.add(streamlines[testset_indices], bundle_name) with Timer("Saving"): train_data.save(args.dataset[:-4] + "_trainset.npz") valid_data.save(args.dataset[:-4] + "_validset.npz") test_data.save(args.dataset[:-4] + "_testset.npz") if args.delete: os.remove(args.dataset)
def main(): parser = buildArgsParser() args = parser.parse_args() data = TractographyData.load(args.dataset) streamlines = data.streamlines print("{} has {:,} streamlines".format(args.dataset, len(streamlines))) if args.list_bundles_name: for bundle_name in data.bundle_names: bundle_id = data.name2id[bundle_name] print("{}: {}".format(bundle_id, bundle_name)) return if args.leave_one_out is not None: with Timer("Splitting {} using a leave-one-out strategy".format(args.dataset), newline=True): for bundle in args.leave_one_out: rng = np.random.RandomState(args.seed) train_data = TractographyData(data.signal, data.gradients, data.name2id) valid_data = TractographyData(data.signal, data.gradients, data.name2id) test_data = TractographyData(data.signal, data.gradients, data.name2id) bundle_ids_to_exclude = list(map(int, bundle.split(','))) missing_bundles_name = [data.bundle_names[i] for i in bundle_ids_to_exclude] if args.verbose: print("Leaving out {}...".format(", ".join(missing_bundles_name))) include = np.ones(len(data.bundle_ids), dtype=bool) exclude = np.zeros(len(data.bundle_ids), dtype=bool) for i in bundle_ids_to_exclude: include = np.logical_and(include, data.bundle_ids != i) exclude = np.logical_or(exclude, data.bundle_ids == i) include_idx = np.where(include)[0] exclude_idx = np.where(exclude)[0] rng.shuffle(include_idx) rng.shuffle(exclude_idx) trainset_indices = include_idx validset_indices = exclude_idx[:len(exclude_idx)//2] testset_indices = exclude_idx[len(exclude_idx)//2:] train_data.add(streamlines[trainset_indices], bundle_ids=data.bundle_ids[trainset_indices]) valid_data.add(streamlines[validset_indices], bundle_ids=data.bundle_ids[validset_indices]) test_data.add(streamlines[testset_indices], bundle_ids=data.bundle_ids[testset_indices]) filename = "missing_{}.npz".format("_".join(missing_bundles_name)) with Timer("Saving dataset: {}".format(filename[:-4])): train_data.save(filename[:-4] + "_trainset.npz") valid_data.save(filename[:-4] + "_validset.npz") test_data.save(filename[:-4] + "_testset.npz") else: rng = np.random.RandomState(args.seed) train_data = TractographyData(data.signal, data.gradients, data.name2id) valid_data = TractographyData(data.signal, data.gradients, data.name2id) test_data = TractographyData(data.signal, data.gradients, data.name2id) with Timer("Splitting {} as follow {} using {}".format(args.dataset, args.split, args.split_type), newline=args.verbose): for bundle_name in data.bundle_names: if args.verbose: print("Splitting bundle {}...".format(bundle_name)) bundle_id = data.name2id[bundle_name] indices = np.where(data.bundle_ids == bundle_id)[0] nb_examples = len(indices) rng.shuffle(indices) if args.split_type == "percentage": trainset_size = int(np.round(args.split[0] * nb_examples)) validset_size = int(np.round(args.split[1] * nb_examples)) testset_size = int(np.round(args.split[2] * nb_examples)) # Make sure the splits sum to nb_examples testset_size += nb_examples - (trainset_size + validset_size + testset_size) elif args.split_type == "count": raise NotImplementedError("Split type `count` not implemented yet!") assert trainset_size + validset_size + testset_size == nb_examples trainset_indices = indices[:trainset_size] validset_indices = indices[trainset_size:-testset_size] testset_indices = indices[-testset_size:] train_data.add(streamlines[trainset_indices], bundle_name) valid_data.add(streamlines[validset_indices], bundle_name) test_data.add(streamlines[testset_indices], bundle_name) with Timer("Saving"): train_data.save(args.dataset[:-4] + "_trainset.npz") valid_data.save(args.dataset[:-4] + "_validset.npz") test_data.save(args.dataset[:-4] + "_testset.npz") if args.delete: os.remove(args.dataset)