def test_gru_mixture_fprop(): hidden_sizes = 50 with Timer("Creating dataset", newline=True): volume_manager = neurotools.VolumeManager() trainset = make_dummy_dataset(volume_manager) print("Dataset sizes:", len(trainset)) batch_scheduler = batch_schedulers.TractographyBatchScheduler( trainset, batch_size=16, noisy_streamlines_sigma=None, seed=1234) print("An epoch will be composed of {} updates.".format( batch_scheduler.nb_updates_per_epoch)) print(volume_manager.data_dimension, hidden_sizes, batch_scheduler.target_size) with Timer("Creating model"): hyperparams = { 'model': 'gru_mixture', 'n_gaussians': 2, 'SGD': "1e-2", 'hidden_sizes': hidden_sizes, 'learn_to_stop': False, 'normalize': False, 'feed_previous_direction': False } model = factories.model_factory( hyperparams, input_size=volume_manager.data_dimension, output_size=batch_scheduler.target_size, volume_manager=volume_manager) model.initialize( factories.weigths_initializer_factory("orthogonal", seed=1234)) # Test fprop with missing streamlines from one subject in a batch output = model.get_output(trainset.symb_inputs) fct = theano.function([trainset.symb_inputs], output, updates=model.graph_updates) batch_inputs, batch_targets, batch_mask = batch_scheduler._next_batch(2) out = fct(batch_inputs) with Timer("Building optimizer"): loss = factories.loss_factory(hyperparams, model, trainset) optimizer = factories.optimizer_factory(hyperparams, loss) fct_loss = theano.function( [trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask], loss.loss, updates=model.graph_updates) loss_value = fct_loss(batch_inputs, batch_targets, batch_mask) print("Loss:", loss_value) fct_optim = theano.function( [trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask], list(optimizer.directions.values()), updates=model.graph_updates) dirs = fct_optim(batch_inputs, batch_targets, batch_mask)
def load_mask_classifier_dataset(subject_files, volume_manager, name="HCP", use_sh_coeffs=False): subjects = [] with Timer(" Loading subject(s)", newline=True): for subject_file in sorted(subject_files): print(" {}".format(subject_file)) mask_data = MaskClassifierData.load(subject_file) dwi = mask_data.signal bvals = mask_data.gradients.bvals bvecs = mask_data.gradients.bvecs if use_sh_coeffs: # Use 45 spherical harmonic coefficients to represent the diffusion signal. volume = neurotools.get_spherical_harmonics_coefficients( dwi, bvals, bvecs).astype(np.float32) else: # Resample the diffusion signal to have 100 directions. volume = neurotools.resample_dwi(dwi, bvals, bvecs).astype(np.float32) mask_data.signal.uncache( ) # Free some memory as we don't need the original signal. subject_id = volume_manager.register(volume) mask_data.subject_id = subject_id subjects.append(mask_data) return MaskClassifierDataset(subjects, name, keep_on_cpu=True)
def main(): parser = build_parser() args = parser.parse_args() for f in args.datasets: with Timer("Normalizing step size of dataset '{}'".format(f)): tractography_data = neurotools.TractographyData.load(f) t = nib.streamlines.Tractogram(tractography_data.streamlines) t.apply_affine( tractography_data.signal.affine) # Bring streamlines to RAS+mm streamlines = t.streamlines streamlines._lengths = streamlines._lengths.astype(int) streamlines._offsets = streamlines._offsets.astype(int) lengths = length(streamlines) nb_points = np.ceil(lengths / args.step_size).astype(int) new_streamlines = (set_number_of_points(s, n) for s, n in zip(streamlines, nb_points)) t = nib.streamlines.Tractogram(new_streamlines) t.apply_affine(np.linalg.inv(tractography_data.signal.affine)) t.affine_to_rasmm = np.eye(4) tractography_data.streamlines = t.streamlines filename = f[:-4] + "_" + str(args.step_size) + "mm" + f[-4:] tractography_data.save(filename)
def main(): parser = build_parser() args = parser.parse_args() for f in args.datasets: with Timer("Loading dataset '{}'".format(f)): tractography_data = neurotools.TractographyData.load(f) print(tractography_data)
def main(): parser = build_parser() args = parser.parse_args() print(args) with Timer("Loading streamlines"): trk = nib.streamlines.load(args.tractogram) losses = trk.tractogram.data_per_streamline['loss'] del trk.tractogram.data_per_streamline['loss'] # Not supported in MI-Brain for my version. with Timer("Coloring streamlines"): viridis = plt.get_cmap('RdYlGn') losses = -losses[:, 0] losses -= losses.mean() vmin = losses.min() vmax = losses.max() if args.normalization == "norm": cNorm = colors.Normalize(vmin=vmin, vmax=vmax) elif args.normalization == "log": cNorm = colors.LogNorm(vmin=vmin, vmax=vmax) elif args.normalization == "symlog": cNorm = colors.SymLogNorm(linthresh=0.03, linscale=1, vmin=vmin, vmax=vmax) else: raise ValueError("Unkown normalization: {}".format(args.normalization)) scalarMap = cm.ScalarMappable(norm=cNorm, cmap=viridis) print(scalarMap.get_clim()) # losses -= losses.mean() # losses /= losses.std() streamlines_colors = scalarMap.to_rgba(losses, bytes=True)[:, :-1] # from dipy.viz import fvtk # streamlines_colors = fvtk.create_colormap(-losses[:, 0]) * 255 colors_per_point = ArraySequence([np.tile(c, (len(s), 1)) for s, c in zip(trk.tractogram.streamlines, streamlines_colors)]) trk.tractogram.data_per_point['color'] = colors_per_point with Timer("Saving streamlines"): if args.out is None: args.out = args.tractogram[:-4] + "_color_" + args.normalization + args.tractogram[-4:] nib.streamlines.save(trk.tractogram, args.out)
def main(): parser = buildArgsParser() args = parser.parse_args() data = TractographyData.load(args.dataset) streamlines = data.streamlines print("{} has {:,} streamlines".format(args.dataset, len(streamlines))) if args.list_bundles_name: for bundle_name in data.bundle_names: bundle_id = data.name2id[bundle_name] print("{}: {}".format(bundle_id, bundle_name)) return if args.leave_one_out is not None: with Timer("Splitting {} using a leave-one-out strategy".format( args.dataset), newline=True): for bundle in args.leave_one_out: rng = np.random.RandomState(args.seed) train_data = TractographyData(data.signal, data.gradients, data.name2id) valid_data = TractographyData(data.signal, data.gradients, data.name2id) test_data = TractographyData(data.signal, data.gradients, data.name2id) bundle_ids_to_exclude = list(map(int, bundle.split(','))) missing_bundles_name = [ data.bundle_names[i] for i in bundle_ids_to_exclude ] if args.verbose: print("Leaving out {}...".format( ", ".join(missing_bundles_name))) include = np.ones(len(data.bundle_ids), dtype=bool) exclude = np.zeros(len(data.bundle_ids), dtype=bool) for i in bundle_ids_to_exclude: include = np.logical_and(include, data.bundle_ids != i) exclude = np.logical_or(exclude, data.bundle_ids == i) include_idx = np.where(include)[0] exclude_idx = np.where(exclude)[0] rng.shuffle(include_idx) rng.shuffle(exclude_idx) trainset_indices = include_idx validset_indices = exclude_idx[:len(exclude_idx) // 2] testset_indices = exclude_idx[len(exclude_idx) // 2:] train_data.add(streamlines[trainset_indices], bundle_ids=data.bundle_ids[trainset_indices]) valid_data.add(streamlines[validset_indices], bundle_ids=data.bundle_ids[validset_indices]) test_data.add(streamlines[testset_indices], bundle_ids=data.bundle_ids[testset_indices]) filename = "missing_{}.npz".format( "_".join(missing_bundles_name)) with Timer("Saving dataset: {}".format(filename[:-4])): train_data.save(filename[:-4] + "_trainset.npz") valid_data.save(filename[:-4] + "_validset.npz") test_data.save(filename[:-4] + "_testset.npz") else: rng = np.random.RandomState(args.seed) train_data = TractographyData(data.signal, data.gradients, data.name2id) valid_data = TractographyData(data.signal, data.gradients, data.name2id) test_data = TractographyData(data.signal, data.gradients, data.name2id) with Timer("Splitting {} as follow {} using {}".format( args.dataset, args.split, args.split_type), newline=args.verbose): for bundle_name in data.bundle_names: if args.verbose: print("Splitting bundle {}...".format(bundle_name)) bundle_id = data.name2id[bundle_name] indices = np.where(data.bundle_ids == bundle_id)[0] nb_examples = len(indices) rng.shuffle(indices) if args.split_type == "percentage": trainset_size = int(np.round(args.split[0] * nb_examples)) validset_size = int(np.round(args.split[1] * nb_examples)) testset_size = int(np.round(args.split[2] * nb_examples)) # Make sure the splits sum to nb_examples testset_size += nb_examples - ( trainset_size + validset_size + testset_size) elif args.split_type == "count": raise NotImplementedError( "Split type `count` not implemented yet!") assert trainset_size + validset_size + testset_size == nb_examples trainset_indices = indices[:trainset_size] validset_indices = indices[trainset_size:-testset_size] testset_indices = indices[-testset_size:] train_data.add(streamlines[trainset_indices], bundle_name) valid_data.add(streamlines[validset_indices], bundle_name) test_data.add(streamlines[testset_indices], bundle_name) with Timer("Saving"): train_data.save(args.dataset[:-4] + "_trainset.npz") valid_data.save(args.dataset[:-4] + "_validset.npz") test_data.save(args.dataset[:-4] + "_testset.npz") if args.delete: os.remove(args.dataset)
def main(): parser = build_parser() args = parser.parse_args() print(args) if min(args.keep_top) < 0: parser.error("--keep-top must be between in [0, 1].") # Get experiment folder experiment_path = args.name if not os.path.isdir(experiment_path): # If not a directory, it must be the name of the experiment. experiment_path = pjoin(".", "experiments", args.name) if not os.path.isdir(experiment_path): parser.error('Cannot find experiment: {0}!'.format(args.name)) # Load experiments hyperparameters try: hyperparams = smartutils.load_dict_from_json_file( pjoin(experiment_path, "hyperparams.json")) except FileNotFoundError: hyperparams = smartutils.load_dict_from_json_file( pjoin(experiment_path, "..", "hyperparams.json")) # Use this for hyperparams added in a new version, but nonexistent from older versions retrocompatibility_defaults = { 'feed_previous_direction': False, 'predict_offset': False, 'normalize': False, 'keep_step_size': False, 'sort_streamlines': False } for new_hyperparams, default_value in retrocompatibility_defaults.items(): if new_hyperparams not in hyperparams: hyperparams[new_hyperparams] = default_value with Timer("Loading signal data and tractogram", newline=True): volume_manager = VolumeManager() dataset = datasets.load_tractography_dataset_from_dwi_and_tractogram( args.signal, args.tractogram, volume_manager, use_sh_coeffs=hyperparams['use_sh_coeffs'], bvals=args.bvals, bvecs=args.bvecs, step_size=args.step_size) print("Dataset size:", len(dataset)) if vizu_available and args.vizu: vizu.check_dataset_integrity(dataset, subset=0.2) with Timer("Loading model"): loss_type = args.loss_type model = None if hyperparams['model'] == 'gru_regression': from learn2track.models import GRU_Regression model = GRU_Regression.create(experiment_path, volume_manager=volume_manager) elif hyperparams['model'] == 'gru_mixture': from learn2track.models import GRU_Mixture model = GRU_Mixture.create(experiment_path, volume_manager=volume_manager) elif hyperparams['model'] == 'gru_multistep': from learn2track.models import GRU_Multistep_Gaussian model = GRU_Multistep_Gaussian.create( experiment_path, volume_manager=volume_manager) model.k = 1 model.m = 1 elif hyperparams['model'] == 'ffnn_regression': from learn2track.models import FFNN_Regression model = FFNN_Regression.create(experiment_path, volume_manager=volume_manager) if loss_type in ['l2_sum', 'l2_mean']: loss_type = "expected_value" else: raise NameError("Unknown model: {}".format(hyperparams['model'])) with Timer("Building evaluation function"): # Override K for gru_multistep if 'k' in hyperparams: hyperparams['k'] = 1 batch_scheduler = batch_scheduler_factory( hyperparams, dataset, use_data_augment= False, # Otherwise it doubles the number of losses :-/ train_mode=False, batch_size_override=args.batch_size) loss = loss_factory(hyperparams, model, dataset, loss_type=loss_type) l2_error = views.LossView(loss=loss, batch_scheduler=batch_scheduler) with Timer("Scoring...", newline=True): dummy_status = Status() # Forces recomputing results losses = l2_error.losses.view(dummy_status) if hyperparams['model'] == 'ffnn_regression': _losses = dataset.streamlines.copy() _losses._data = losses.copy() _losses._lengths -= 1 _losses._offsets -= np.arange(len(dataset.streamlines)) if args.loss_type == 'l2_sum': losses = np.asarray([l.sum() for l in _losses]) elif args.loss_type == 'l2_mean': losses = np.asarray([l.mean() for l in _losses]) mean = float(l2_error.mean.view(dummy_status)) stderror = float(l2_error.stderror.view(dummy_status)) print("Loss: {:.4f} ± {:.4f}".format(mean, stderror)) print("Min: {:.4f}".format(losses.min())) print("Max: {:.4f}".format(losses.max())) print("Percentiles: {}".format( np.percentile(losses, [0, 25, 50, 75, 100]))) with Timer("Saving streamlines"): nii = dataset.subjects[0].signal tractogram = nib.streamlines.Tractogram( dataset.streamlines[batch_scheduler.indices], affine_to_rasmm=nii.affine) tractogram.data_per_streamline['loss'] = losses header = {} header[Field.VOXEL_TO_RASMM] = nii.affine.copy() header[Field.VOXEL_SIZES] = nii.header.get_zooms()[:3] header[Field.DIMENSIONS] = nii.shape[:3] header[Field.VOXEL_ORDER] = "".join(aff2axcodes(nii.affine)) nib.streamlines.save(tractogram.copy(), args.out, header=header) if len(args.keep_top) > 0: for keep_top in args.keep_top: with Timer("Saving top {}% streamlines".format(keep_top)): idx = np.argsort(losses) idx = idx[:int(keep_top * len(losses))] print("Keeping {}/{} streamlines".format( len(idx), len(losses))) sub_tractogram = tractogram[idx] out_filename = args.out[:-4] + "_top{}".format( keep_top) + ".tck" nib.streamlines.save(sub_tractogram, out_filename)
def main(): parser = build_argparser() args = parser.parse_args() # Get experiment folder experiment_path = args.name if not os.path.isdir(experiment_path): # If not a directory, it must be the name of the experiment. experiment_path = pjoin(".", "experiments", args.name) if not os.path.isdir(experiment_path): parser.error('Cannot find experiment: {0}!'.format(args.name)) # Load experiments hyperparameters try: hyperparams = smartutils.load_dict_from_json_file( pjoin(experiment_path, "hyperparams.json")) except FileNotFoundError: hyperparams = smartutils.load_dict_from_json_file( pjoin(experiment_path, "..", "hyperparams.json")) with Timer("Loading DWIs"): # Load gradients table dwi_name = args.dwi if dwi_name.endswith(".gz"): dwi_name = dwi_name[:-3] if dwi_name.endswith(".nii"): dwi_name = dwi_name[:-4] try: bvals_filename = dwi_name + ".bvals" bvecs_filename = dwi_name + ".bvecs" bvals, bvecs = dipy.io.gradients.read_bvals_bvecs( bvals_filename, bvecs_filename) except FileNotFoundError: try: bvals_filename = dwi_name + ".bval" bvecs_filename = dwi_name + ".bvec" bvals, bvecs = dipy.io.gradients.read_bvals_bvecs( bvals_filename, bvecs_filename) except FileNotFoundError as e: print("Could not find .bvals/.bvecs or .bval/.bvec files...") raise e dwi = nib.load(args.dwi) if hyperparams["use_sh_coeffs"]: # Use 45 spherical harmonic coefficients to represent the diffusion signal. weights = neurotools.get_spherical_harmonics_coefficients( dwi, bvals, bvecs).astype(np.float32) else: # Resample the diffusion signal to have 100 directions. weights = neurotools.resample_dwi(dwi, bvals, bvecs).astype(np.float32) affine_rasmm2dwivox = np.linalg.inv(dwi.affine) with Timer("Loading model"): if hyperparams["model"] == "gru_regression": from learn2track.models import GRU_Regression model_class = GRU_Regression elif hyperparams['model'] == 'gru_gaussian': from learn2track.models import GRU_Gaussian model_class = GRU_Gaussian elif hyperparams['model'] == 'gru_mixture': from learn2track.models import GRU_Mixture model_class = GRU_Mixture elif hyperparams['model'] == 'gru_multistep': from learn2track.models import GRU_Multistep_Gaussian model_class = GRU_Multistep_Gaussian elif hyperparams['model'] == 'ffnn_regression': from learn2track.models import FFNN_Regression model_class = FFNN_Regression else: raise ValueError("Unknown model!") kwargs = {} volume_manager = neurotools.VolumeManager() volume_manager.register(weights) kwargs['volume_manager'] = volume_manager # Load the actual model. model = model_class.create( pjoin(experiment_path), **kwargs) # Create new instance and restore model. model.drop_prob = 0. print(str(model)) mask = None if args.mask is not None: with Timer("Loading mask"): mask_nii = nib.load(args.mask) mask = mask_nii.get_data() # Compute the affine allowing to evaluate the mask at some coordinates correctly. # affine_maskvox2dwivox = mask_vox => rasmm space => dwi_vox affine_maskvox2dwivox = np.dot(affine_rasmm2dwivox, mask_nii.affine) if args.dilate_mask: import scipy mask = scipy.ndimage.morphology.binary_dilation(mask).astype( mask.dtype) with Timer("Generating seeds"): seeds = [] for filename in args.seeds: if filename.endswith('.trk') or filename.endswith('.tck'): tfile = nib.streamlines.load(filename) # Send the streamlines to voxel since that's where we'll track. tfile.tractogram.apply_affine(affine_rasmm2dwivox) # Use extremities of the streamlines as seeding points. seeds += [s[0] for s in tfile.streamlines] seeds += [s[-1] for s in tfile.streamlines] else: # Assume it is a binary mask. rng = np.random.RandomState(args.seeding_rng_seed) nii_seeds = nib.load(filename) # affine_seedsvox2dwivox = mask_vox => rasmm space => dwi_vox affine_seedsvox2dwivox = np.dot(affine_rasmm2dwivox, nii_seeds.affine) nii_seeds_data = nii_seeds.get_data() if args.dilate_seeding_mask: import scipy nii_seeds_data = scipy.ndimage.morphology.binary_dilation( nii_seeds_data).astype(nii_seeds_data.dtype) indices = np.array(np.where(nii_seeds_data)).T for idx in indices: seeds_in_voxel = idx + rng.uniform( -0.5, 0.5, size=(args.nb_seeds_per_voxel, 3)) seeds_in_voxel = nib.affines.apply_affine( affine_seedsvox2dwivox, seeds_in_voxel) seeds.extend(seeds_in_voxel) seeds = np.array(seeds, dtype=theano.config.floatX) with Timer("Tracking in the diffusion voxel space"): voxel_sizes = np.asarray(dwi.header.get_zooms()[:3]) if not np.all(voxel_sizes == dwi.header.get_zooms()[0]): print("* Careful voxel are anisotropic {}!".format( tuple(voxel_sizes))) # Since we are tracking in diffusion voxel space, convert step_size (in mm) to voxel. if args.step_size is not None: step_size = np.float32(args.step_size / voxel_sizes.max()) # Also convert max length (in mm) to voxel. max_nb_points = int(np.ceil(args.max_length / args.step_size)) else: step_size = None max_nb_points = args.max_length if args.theta is not None: theta = np.deg2rad(args.theta) elif args.curvature is not None and args.curvature > 0: theta = get_max_angle_from_curvature(args.curvature, step_size) else: theta = np.deg2rad(45) print("Angle: {}".format(np.rad2deg(theta))) print("Step size (vox): {}".format(step_size)) print("Max nb. points: {}".format(max_nb_points)) is_outside_mask = make_is_outside_mask(mask, affine_maskvox2dwivox, threshold=args.mask_threshold) is_too_long = make_is_too_long(max_nb_points) is_too_curvy = make_is_too_curvy(np.rad2deg(theta)) is_unlikely = make_is_unlikely(0.5) is_stopping = make_is_stopping({ STOPPING_MASK: is_outside_mask, STOPPING_LENGTH: is_too_long, STOPPING_CURVATURE: is_too_curvy, STOPPING_LIKELIHOOD: is_unlikely }) is_stopping.max_nb_points = max_nb_points # Small hack tractogram = batch_track(model, weights, seeds, step_size=step_size, is_stopping=is_stopping, batch_size=args.batch_size, args=args) # Streamlines have been generated in voxel space. # Transform them them back to RAS+mm space using the dwi's affine. tractogram.affine_to_rasmm = dwi.affine tractogram.to_world() # Performed in-place. nb_streamlines = len(tractogram) if args.save_rejected: rejected_tractogram = Tractogram() rejected_tractogram.affine_to_rasmm = tractogram._affine_to_rasmm print("Generated {:,} (compressed) streamlines".format(nb_streamlines)) with Timer("Cleaning streamlines", newline=True): # Flush streamlines that have no points. if args.save_rejected: rejected_tractogram += tractogram[ np.array(list(map(len, tractogram))) <= 0] tractogram = tractogram[np.array(list(map(len, tractogram))) > 0] print("Removed {:,} empty streamlines".format(nb_streamlines - len(tractogram))) # Remove small streamlines nb_streamlines = len(tractogram) lengths = dipy.tracking.streamline.length(tractogram.streamlines) if args.save_rejected: rejected_tractogram += tractogram[lengths < args.min_length] tractogram = tractogram[lengths >= args.min_length] lengths = lengths[lengths >= args.min_length] if len(lengths) > 0: print("Average length: {:.2f} mm.".format(lengths.mean())) print("Minimum length: {:.2f} mm. Maximum length: {:.2f}".format( lengths.min(), lengths.max())) print("Removed {:,} streamlines smaller than {:.2f} mm".format( nb_streamlines - len(tractogram), args.min_length)) if args.discard_stopped_by_curvature: nb_streamlines = len(tractogram) stopping_curvature_flag_is_set = is_flag_set( tractogram.data_per_streamline['stopping_flags'][:, 0], STOPPING_CURVATURE) if args.save_rejected: rejected_tractogram += tractogram[ stopping_curvature_flag_is_set] tractogram = tractogram[np.logical_not( stopping_curvature_flag_is_set)] print( "Removed {:,} streamlines stopped for having a curvature higher than {:.2f} degree" .format(nb_streamlines - len(tractogram), np.rad2deg(theta))) if args.filter_threshold is not None: # Remove streamlines that produces a reconstruction error higher than a certain threshold. nb_streamlines = len(tractogram) losses = compute_loss_errors(tractogram.streamlines, model, hyperparams) print("Mean loss: {:.4f} ± {:.4f}".format( np.mean(losses), np.std(losses, ddof=1) / np.sqrt(len(losses)))) if args.save_rejected: rejected_tractogram += tractogram[ losses > args.filter_threshold] tractogram = tractogram[losses <= args.filter_threshold] print( "Removed {:,} streamlines producing a loss lower than {:.2f} mm" .format(nb_streamlines - len(tractogram), args.filter_threshold)) with Timer("Saving {:,} (compressed) streamlines".format(len(tractogram))): filename = args.out if args.out is None: prefix = args.prefix if prefix is None: dwi_name = os.path.basename(args.dwi) if dwi_name.endswith(".nii.gz"): dwi_name = dwi_name[:-7] else: # .nii dwi_name = dwi_name[:-4] prefix = os.path.basename(os.path.dirname(args.dwi)) + dwi_name prefix = prefix.replace(".", "_") seed_mask_type = args.seeds[0].replace(".", "_").replace( "_", "").replace("/", "-") if "int" in args.seeds[0]: seed_mask_type = "int" elif "wm" in args.seeds[0]: seed_mask_type = "wm" elif "rois" in args.seeds[0]: seed_mask_type = "rois" elif "bundles" in args.seeds[0]: seed_mask_type = "bundles" mask_type = "" if "fa" in args.mask: mask_type = "fa" elif "wm" in args.mask: mask_type = "wm" if args.dilate_seeding_mask: seed_mask_type += "D" if args.dilate_mask: mask_type += "D" filename_items = [ "{}", "useMaxComponent-{}", # "seed-{}", # "mask-{}", "step-{:.2f}mm", "nbSeeds-{}", "maxAngleDeg-{:.1f}" # "keepCurv-{}", # "filtered-{}", # "minLen-{}", # "pftRetry-{}", # "pftHist-{}", # "trackLikePeter-{}", ] filename = ('_'.join(filename_items) + ".tck").format( prefix, args.use_max_component, # seed_mask_type, # mask_type, args.step_size, args.nb_seeds_per_voxel, np.rad2deg(theta) # not args.discard_stopped_by_curvature, # args.filter_threshold, # args.min_length, # args.pft_nb_retry, # args.pft_nb_backtrack_steps, # args.track_like_peter ) save_path = pjoin(experiment_path, filename) try: # Create dirs, if needed. os.makedirs(os.path.dirname(save_path)) except: pass print("Saving to {}".format(save_path)) nib.streamlines.save(tractogram, save_path) if args.save_rejected: with Timer("Saving {:,} (compressed) rejected streamlines".format( len(rejected_tractogram))): rejected_filename_items = filename_items.copy() rejected_filename_items.insert(1, "rejected") rejected_filename = ( '_'.join(rejected_filename_items) + ".tck" ).format( prefix, args.use_max_component, # seed_mask_type, # mask_type, args.step_size, args.nb_seeds_per_voxel, np.rad2deg(theta) # not args.discard_stopped_by_curvature, # args.filter_threshold, # args.min_length, # args.pft_nb_retry, # args.pft_nb_backtrack_steps, # args.track_like_peter ) rejected_save_path = pjoin(experiment_path, rejected_filename) try: # Create dirs, if needed. os.makedirs(os.path.dirname(rejected_save_path)) except: pass print("Saving rejected streamlines to {}".format(rejected_save_path)) nib.streamlines.save(rejected_tractogram, rejected_save_path)
def main(): parser = build_args_parser() args = parser.parse_args() print(args) # Get experiment folder experiment_path = args.name if not os.path.isdir(experiment_path): # If not a directory, it must be the name of the experiment. experiment_path = pjoin(".", "experiments", args.name) if not os.path.isdir(experiment_path): parser.error('Cannot find experiment: {0}!'.format(args.name)) # Load experiments hyperparameters try: hyperparams = smartutils.load_dict_from_json_file( pjoin(experiment_path, "hyperparams.json")) except FileNotFoundError: hyperparams = smartutils.load_dict_from_json_file( pjoin(experiment_path, "..", "hyperparams.json")) with Timer("Loading dataset", newline=True): volume_manager = VolumeManager() dataset = datasets.load_tractography_dataset( [args.streamlines], volume_manager, name="dataset", use_sh_coeffs=hyperparams['use_sh_coeffs']) print("Dataset size:", len(dataset)) with Timer("Loading model"): model = None if hyperparams['model'] == 'gru_regression': from learn2track.models import GRU_Regression model = GRU_Regression.create(experiment_path, volume_manager=volume_manager) elif hyperparams['model'] == 'gru_mixture': from learn2track.models import GRU_Mixture model = GRU_Mixture.create(experiment_path, volume_manager=volume_manager) elif hyperparams['model'] == 'gru_multistep': from learn2track.models import GRU_Multistep_Gaussian model = GRU_Multistep_Gaussian.create( experiment_path, volume_manager=volume_manager) model.k = 1 model.m = 1 elif hyperparams['model'] == 'ffnn_regression': from learn2track.models import FFNN_Regression model = FFNN_Regression.create(experiment_path, volume_manager=volume_manager) else: raise NameError("Unknown model: {}".format(hyperparams['model'])) print(str(model)) tractogram_file = pjoin(experiment_path, args.out) if not os.path.isfile(tractogram_file) or args.force: if args.method == 'prediction': tractogram = prediction_tractogram(hyperparams, model, dataset, args.batch_size, args.prediction) elif args.method == 'evaluation': tractogram = evaluation_tractogram(hyperparams, model, dataset, args.batch_size, args.metric) else: raise ValueError("Unrecognized method: {}".format(args.method)) tractogram.affine_to_rasmm = dataset.subjects[0].signal.affine nib.streamlines.save(tractogram, tractogram_file) else: print("Tractogram already exists. (use --force to generate it again)")
def main(): parser = build_argparser() args = parser.parse_args() print(args) print("Using Theano v.{}".format(theano.version.short_version)) hyperparams_to_exclude = ['max_epoch', 'force', 'name', 'view', 'shuffle_streamlines'] # Use this for hyperparams added in a new version, but nonexistent from older versions retrocompatibility_defaults = {'feed_previous_direction': False, 'predict_offset': False, 'normalize': False, 'sort_streamlines': False, 'keep_step_size': False, 'use_layer_normalization': False, 'drop_prob': 0., 'use_zoneout': False, 'skip_connections': False} experiment_path, hyperparams, resuming = utils.maybe_create_experiment_folder(args, exclude=hyperparams_to_exclude, retrocompatibility_defaults=retrocompatibility_defaults) # Log the command currently running. with open(pjoin(experiment_path, 'cmd.txt'), 'a') as f: f.write(" ".join(sys.argv) + "\n") print("Resuming:" if resuming else "Creating:", experiment_path) with Timer("Loading dataset", newline=True): trainset_volume_manager = VolumeManager() validset_volume_manager = VolumeManager() trainset = datasets.load_tractography_dataset(args.train_subjects, trainset_volume_manager, name="trainset", use_sh_coeffs=args.use_sh_coeffs) validset = datasets.load_tractography_dataset(args.valid_subjects, validset_volume_manager, name="validset", use_sh_coeffs=args.use_sh_coeffs) print("Dataset sizes:", len(trainset), " |", len(validset)) batch_scheduler = batch_scheduler_factory(hyperparams, dataset=trainset, train_mode=True) print("An epoch will be composed of {} updates.".format(batch_scheduler.nb_updates_per_epoch)) print(trainset_volume_manager.data_dimension, args.hidden_sizes, batch_scheduler.target_size) with Timer("Creating model"): input_size = trainset_volume_manager.data_dimension if hyperparams['feed_previous_direction']: input_size += 3 model = model_factory(hyperparams, input_size=input_size, output_size=batch_scheduler.target_size, volume_manager=trainset_volume_manager) model.initialize(weigths_initializer_factory(args.weights_initialization, seed=args.initialization_seed)) with Timer("Building optimizer"): loss = loss_factory(hyperparams, model, trainset) if args.clip_gradient is not None: loss.append_gradient_modifier(DirectionClipping(threshold=args.clip_gradient)) optimizer = optimizer_factory(hyperparams, loss) with Timer("Building trainer"): trainer = Trainer(optimizer, batch_scheduler) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) trainer.append_task(avg_loss) # Print average training loss. trainer.append_task(tasks.Print("Avg. training loss: : {}", avg_loss)) # if args.learn_to_stop: # l2err_monitor = views.MonitorVariable(T.mean(loss.mean_sqr_error)) # avg_l2err = tasks.AveragePerEpoch(l2err_monitor) # trainer.append_task(avg_l2err) # # crossentropy_monitor = views.MonitorVariable(T.mean(loss.cross_entropy)) # avg_crossentropy = tasks.AveragePerEpoch(crossentropy_monitor) # trainer.append_task(avg_crossentropy) # # trainer.append_task(tasks.Print("Avg. training L2 err: : {}", avg_l2err)) # trainer.append_task(tasks.Print("Avg. training stopping: : {}", avg_crossentropy)) # trainer.append_task(tasks.Print("L2 err : {0:.4f}", l2err_monitor, each_k_update=100)) # trainer.append_task(tasks.Print("stopping : {0:.4f}", crossentropy_monitor, each_k_update=100)) # Print NLL mean/stderror. # train_loss = L2DistanceForSequences(model, trainset) # train_batch_scheduler = StreamlinesBatchScheduler(trainset, batch_size=1000, # noisy_streamlines_sigma=None, # nb_updates_per_epoch=None, # seed=1234) # train_error = views.LossView(loss=train_loss, batch_scheduler=train_batch_scheduler) # trainer.append_task(tasks.Print("Trainset - Error : {0:.2f} | {1:.2f}", train_error.sum, train_error.mean)) # HACK: To make sure all subjects in the volume_manager are used in a batch, we have to split the trainset/validset in 2 volume managers model.volume_manager = validset_volume_manager model.drop_prob = 0. # Do not use dropout/zoneout for evaluation valid_loss = loss_factory(hyperparams, model, validset) valid_batch_scheduler = batch_scheduler_factory(hyperparams, dataset=validset, train_mode=False) valid_error = views.LossView(loss=valid_loss, batch_scheduler=valid_batch_scheduler) trainer.append_task(tasks.Print("Validset - Error : {0:.2f} | {1:.2f}", valid_error.sum, valid_error.mean)) if hyperparams['model'] == 'ffnn_regression': valid_batch_scheduler2 = batch_scheduler_factory(hyperparams, dataset=validset, train_mode=False) valid_l2 = loss_factory(hyperparams, model, validset, loss_type="expected_value") valid_l2_error = views.LossView(loss=valid_l2, batch_scheduler=valid_batch_scheduler2) trainer.append_task(tasks.Print("Validset - {}".format(valid_l2.__class__.__name__) + "\t: {0:.2f} | {1:.2f}", valid_l2_error.sum, valid_l2_error.mean)) # HACK: Restore trainset volume manager model.volume_manager = trainset_volume_manager model.drop_prob = hyperparams['drop_prob'] # Restore dropout lookahead_loss = valid_error.sum direction_norm = views.MonitorVariable(T.sqrt(sum(map(lambda d: T.sqr(d).sum(), loss.gradients.values())))) # trainer.append_task(tasks.Print("||d|| : {0:.4f}", direction_norm)) # logger = tasks.Logger(train_error.mean, valid_error.mean, valid_error.sum, direction_norm) logger = tasks.Logger(valid_error.mean, valid_error.sum, direction_norm) trainer.append_task(logger) if args.view: import pylab as plt def _plot(*args, **kwargs): plt.figure(1) plt.clf() plt.show(False) plt.subplot(121) plt.plot(np.array(logger.get_variable_history(0)).flatten(), label="Train") plt.plot(np.array(logger.get_variable_history(1)).flatten(), label="Valid") plt.legend() plt.subplot(122) plt.plot(np.array(logger.get_variable_history(3)).flatten(), label="||d'||") plt.draw() trainer.append_task(tasks.Callback(_plot)) # Callback function to stop training if NaN is detected. def detect_nan(obj, status): if np.isnan(model.parameters[0].get_value().sum()): print("NaN detected! Stopping training now.") sys.exit() trainer.append_task(tasks.Callback(detect_nan, each_k_update=1)) # Callback function to save training progression. def save_training(obj, status): trainer.save(experiment_path) trainer.append_task(tasks.Callback(save_training)) # Early stopping with a callback for saving every time model improves. def save_improvement(obj, status): """ Save best model and training progression. """ if np.isnan(model.parameters[0].get_value().sum()): print("NaN detected! Not saving the model. Crashing now.") sys.exit() print("*** Best epoch: {0} ***\n".format(obj.best_epoch)) model.save(experiment_path) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) trainer.append_task(tasks.PrintTime(each_k_update=100)) # Profiling # Add stopping criteria trainer.append_task(stopping_criteria.MaxEpochStopping(args.max_epoch)) early_stopping = stopping_criteria.EarlyStopping(lookahead_loss, lookahead=args.lookahead, eps=args.lookahead_eps, callback=save_improvement) trainer.append_task(early_stopping) with Timer("Compiling Theano graph"): trainer.build_theano_graph() if resuming: if not os.path.isdir(pjoin(experiment_path, 'training')): print("No 'training/' folder. Assuming it failed before" " the end of the first epoch. Starting a new training.") else: with Timer("Loading"): trainer.load(experiment_path) with Timer("Training"): trainer.train()
def test_gru_mixture_track(): hidden_sizes = 50 with Timer("Creating dummy volume", newline=True): volume_manager = neurotools.VolumeManager() dwi, gradients = make_dummy_dwi(nb_gradients=30, volume_shape=(10, 10, 10), seed=1234) volume = neurotools.resample_dwi(dwi, gradients.bvals, gradients.bvecs).astype(np.float32) volume_manager.register(volume) with Timer("Creating model"): hyperparams = { 'model': 'gru_mixture', 'SGD': "1e-2", 'hidden_sizes': hidden_sizes, 'learn_to_stop': False, 'normalize': False, 'activation': 'tanh', 'feed_previous_direction': False, 'predict_offset': False, 'use_layer_normalization': False, 'drop_prob': 0., 'use_zoneout': False, 'skip_connections': False, 'neighborhood_radius': None, 'nb_seeds_per_voxel': 2, 'step_size': 0.5, 'batch_size': 200, 'n_gaussians': 2, 'seed': 1234 } model = factories.model_factory( hyperparams, input_size=volume_manager.data_dimension, output_size=3, volume_manager=volume_manager) model.initialize( factories.weigths_initializer_factory("orthogonal", seed=1234)) rng = np.random.RandomState(1234) mask = np.ones(volume.shape[:3]) seeding_mask = np.random.randint(2, size=mask.shape) seeds = [] indices = np.array(np.where(seeding_mask)).T for idx in indices: seeds_in_voxel = idx + rng.uniform( -0.5, 0.5, size=(hyperparams['nb_seeds_per_voxel'], 3)) seeds.extend(seeds_in_voxel) seeds = np.array(seeds, dtype=theano.config.floatX) is_outside_mask = make_is_outside_mask(mask, np.eye(4), threshold=0.5) is_too_long = make_is_too_long(150) is_too_curvy = make_is_too_curvy(np.rad2deg(30)) is_unlikely = make_is_unlikely(0.5) is_stopping = make_is_stopping({ STOPPING_MASK: is_outside_mask, STOPPING_LENGTH: is_too_long, STOPPING_CURVATURE: is_too_curvy, STOPPING_LIKELIHOOD: is_unlikely }) is_stopping.max_nb_points = 150 args = SimpleNamespace() args.track_like_peter = False args.pft_nb_retry = 0 args.pft_nb_backtrack_steps = 0 args.use_max_component = False args.flip_x = False args.flip_y = False args.flip_z = False args.verbose = True tractogram = batch_track(model, volume, seeds, step_size=hyperparams['step_size'], is_stopping=is_stopping, batch_size=hyperparams['batch_size'], args=args) return True
def main(): parser = build_argparser() args = parser.parse_args() tracto_data = None if args.signal_source == "raw_signal": signal = nib.load(args.signal) signal.get_data() # Forces loading volume in-memory. basename = re.sub('(\.gz|\.nii.gz)$', '', args.signal) try: bvals = basename + '.bvals' if args.bvals is None else args.bvals bvecs = basename + '.bvecs' if args.bvecs is None else args.bvecs gradients = gradient_table(bvals, bvecs) except FileNotFoundError: try: bvals = basename + '.bval' if args.bvals is None else args.bvals bvecs = basename + '.bvec' if args.bvecs is None else args.bvecs gradients = gradient_table(bvals, bvecs) except FileNotFoundError as e: print("Could not find .bvals/.bvecs or .bval/.bvec files...") raise e tracto_data = TractographyData(signal, gradients) elif args.signal_source == "processed_signal": loaded_tracto_data = TractographyData.load(args.tracto_data) tracto_data = TractographyData(loaded_tracto_data.signal, loaded_tracto_data.gradients) # Compute matrix that brings streamlines back to diffusion voxel space. rasmm2vox_affine = np.linalg.inv(tracto_data.signal.affine) # Retrieve data. with Timer("Retrieving data", newline=args.verbose): for filename in sorted(args.bundles): if args.verbose: print("{}".format(filename)) # Load streamlines tfile = nib.streamlines.load(filename) tractogram = tfile.tractogram original_streamlines = tractogram.streamlines lengths = length(original_streamlines) streamlines = [ s for (s, l) in zip(original_streamlines, lengths) if l >= args.min_length ] # Make sure file is not empty if len(streamlines) > 0: if args.subsample_streamlines: output_streamlines = subsample_streamlines( streamlines, args.clustering_threshold, args.removal_distance) print("Total difference: {} / {}".format( len(original_streamlines), len(output_streamlines))) new_tractogram = nib.streamlines.Tractogram( output_streamlines, affine_to_rasmm=tractogram.affine_to_rasmm) tractogram = new_tractogram tractogram.apply_affine(rasmm2vox_affine) # Add streamlines to the TractogramData bundle_name = os.path.splitext(os.path.basename(filename))[0] tracto_data.add(tractogram.streamlines, bundle_name) if args.verbose: diff = tracto_data.streamlines._data - tracto_data.streamlines._data.astype( args.dtype) precision_error = np.sum(np.sqrt(np.sum(diff**2, axis=1))) avg_precision_error = precision_error / len( tracto_data.streamlines._data) print("Precision error: {} (avg. {})".format(precision_error, avg_precision_error)) # Save streamlines coordinates using either float16 or float32. tracto_data.streamlines._data = tracto_data.streamlines._data.astype( args.dtype) # Save dataset tracto_data.save(args.out)
def test_gru_mixture_fprop_neighborhood(): hyperparams = { 'model': 'gru_mixture', 'SGD': "1e-2", 'hidden_sizes': 50, 'batch_size': 16, 'learn_to_stop': False, 'normalize': True, 'activation': 'tanh', 'feed_previous_direction': False, 'predict_offset': False, 'use_layer_normalization': False, 'drop_prob': 0., 'use_zoneout': False, 'skip_connections': False, 'seed': 1234, 'noisy_streamlines_sigma': None, 'keep_step_size': True, 'sort_streamlines': False, 'n_gaussians': 2, 'neighborhood_radius': 0.5 } with Timer("Creating dataset", newline=True): volume_manager = neurotools.VolumeManager() trainset = make_dummy_dataset(volume_manager) print("Dataset sizes:", len(trainset)) batch_scheduler = factories.batch_scheduler_factory(hyperparams, dataset=trainset) print("An epoch will be composed of {} updates.".format( batch_scheduler.nb_updates_per_epoch)) print(volume_manager.data_dimension, hyperparams['hidden_sizes'], batch_scheduler.target_size) with Timer("Creating model"): model = factories.model_factory( hyperparams, input_size=volume_manager.data_dimension, output_size=batch_scheduler.target_size, volume_manager=volume_manager) model.initialize( factories.weigths_initializer_factory("orthogonal", seed=1234)) print("Input size: {}".format(model.model_input_size)) # Test fprop with missing streamlines from one subject in a batch output = model.get_output(trainset.symb_inputs) fct = theano.function([trainset.symb_inputs], output, updates=model.graph_updates) batch_inputs, batch_targets, batch_mask = batch_scheduler._next_batch(2) out = fct(batch_inputs) with Timer("Building optimizer"): loss = factories.loss_factory(hyperparams, model, trainset) optimizer = factories.optimizer_factory(hyperparams, loss) fct_loss = theano.function( [trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask], loss.loss, updates=model.graph_updates) loss_value = fct_loss(batch_inputs, batch_targets, batch_mask) print("Loss:", loss_value) fct_optim = theano.function( [trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask], list(optimizer.directions.values()), updates=model.graph_updates) dirs = fct_optim(batch_inputs, batch_targets, batch_mask) return True
def main(): parser = build_argparser() args = parser.parse_args() signal = nib.load(args.signal) data = signal.get_data() # Compute matrix that brings streamlines back to diffusion voxel space. rasmm2vox_affine = np.linalg.inv(signal.affine) # Retrieve data. with Timer("Retrieving data"): print("Loading {}".format(args.filename)) # Load streamlines (already in RASmm space) tfile = nib.streamlines.load(args.filename) tfile.tractogram.apply_affine(rasmm2vox_affine) # tfile.tractogram.apply_affine(rasmm2vox_affine) tractogram = Tractogram(streamlines=tfile.streamlines, affine_to_rasmm=signal.affine) with Timer("Filtering streamlines"): # Get volume bounds x_max = data.shape[0] - 0.5 y_max = data.shape[1] - 0.5 z_max = data.shape[2] - 0.5 mask = np.ones((len(tractogram), )).astype(bool) for i, s in enumerate(tractogram.streamlines): # Identify streamlines out of bounds oob_test = np.logical_or.reduce(( s[:, 0] < -0.5, s[:, 0] >= x_max, # Out of bounds on axis X s[:, 1] < -0.5, s[:, 1] >= y_max, # Out of bounds on axis Y s[:, 2] < -0.5, s[:, 2] >= z_max)) # Out of bounds on axis Z if np.any(oob_test): mask[i] = False tractogram_filtered = tractogram[mask] tractogram_removed = tractogram[np.logical_not(mask)] print("Kept {} streamlines and removed {} streamlines".format( len(tractogram_filtered), len(tractogram_removed))) with Timer("Saving filtered and removed streamlines"): base_filename = args.out_prefix if args.out_prefix is None: base_filename = args.filename[:-4] tractogram_filtered_filename = "{}_filtered.tck".format(base_filename) tractogram_removed_filename = "{}_removed.tck".format(base_filename) # Save streamlines nib.streamlines.save(tractogram_filtered, tractogram_filtered_filename) nib.streamlines.save(tractogram_removed, tractogram_removed_filename)
def main(): parser = build_argparser() args = parser.parse_args() print(args) print("Using Theano v.{}".format(theano.version.short_version)) hyperparams_to_exclude = ['max_epoch', 'force', 'name', 'view'] # Use this for hyperparams added in a new version, but nonexistent from older versions retrocompatibility_defaults = {'use_layer_normalization': False} experiment_path, hyperparams, resuming = utils.maybe_create_experiment_folder( args, exclude=hyperparams_to_exclude, retrocompatibility_defaults=retrocompatibility_defaults) # Log the command currently running. with open(pjoin(experiment_path, 'cmd.txt'), 'a') as f: f.write(" ".join(sys.argv) + "\n") print("Resuming:" if resuming else "Creating:", experiment_path) with Timer("Loading dataset", newline=True): trainset_volume_manager = VolumeManager() validset_volume_manager = VolumeManager() trainset = datasets.load_mask_classifier_dataset( args.train_subjects, trainset_volume_manager, name="trainset", use_sh_coeffs=args.use_sh_coeffs) validset = datasets.load_mask_classifier_dataset( args.valid_subjects, validset_volume_manager, name="validset", use_sh_coeffs=args.use_sh_coeffs) print("Dataset sizes:", len(trainset), " |", len(validset)) batch_scheduler = MaskClassifierBatchScheduler( trainset, hyperparams['batch_size'], seed=hyperparams['seed']) print("An epoch will be composed of {} updates.".format( batch_scheduler.nb_updates_per_epoch)) print(trainset_volume_manager.data_dimension, args.hidden_sizes, batch_scheduler.target_size) with Timer("Creating model"): input_size = trainset_volume_manager.data_dimension model = FFNN_Classification(trainset_volume_manager, input_size, hyperparams['hidden_sizes']) model.initialize( weigths_initializer_factory(args.weights_initialization, seed=args.initialization_seed)) with Timer("Building optimizer"): loss = BinaryCrossEntropy(model, trainset) if args.clip_gradient is not None: loss.append_gradient_modifier( DirectionClipping(threshold=args.clip_gradient)) optimizer = optimizer_factory(hyperparams, loss) with Timer("Building trainer"): trainer = Trainer(optimizer, batch_scheduler) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) trainer.append_task(avg_loss) # Print average training loss. trainer.append_task( tasks.Print("Avg. training loss: : {}", avg_loss)) # HACK: To make sure all subjects in the volume_manager are used in a batch, we have to split the trainset/validset in 2 volume managers model.volume_manager = validset_volume_manager valid_loss = BinaryCrossEntropy(model, validset) valid_batch_scheduler = MaskClassifierBatchScheduler( validset, hyperparams['batch_size'], seed=hyperparams['seed']) valid_error = views.LossView(loss=valid_loss, batch_scheduler=valid_batch_scheduler) trainer.append_task( tasks.Print("Validset - Error : {0:.2f} | {1:.2f}", valid_error.sum, valid_error.mean)) # HACK: Restore trainset volume manager model.volume_manager = trainset_volume_manager lookahead_loss = valid_error.sum direction_norm = views.MonitorVariable( T.sqrt(sum(map(lambda d: T.sqr(d).sum(), loss.gradients.values())))) # trainer.append_task(tasks.Print("||d|| : {0:.4f}", direction_norm)) # logger = tasks.Logger(train_error.mean, valid_error.mean, valid_error.sum, direction_norm) logger = tasks.Logger(valid_error.mean, valid_error.sum, direction_norm) trainer.append_task(logger) # Callback function to stop training if NaN is detected. def detect_nan(obj, status): if np.isnan(model.parameters[0].get_value().sum()): print("NaN detected! Stopping training now.") sys.exit() trainer.append_task(tasks.Callback(detect_nan, each_k_update=1)) # Callback function to save training progression. def save_training(obj, status): trainer.save(experiment_path) trainer.append_task(tasks.Callback(save_training)) # Early stopping with a callback for saving every time model improves. def save_improvement(obj, status): """ Save best model and training progression. """ if np.isnan(model.parameters[0].get_value().sum()): print("NaN detected! Not saving the model. Crashing now.") sys.exit() print("*** Best epoch: {0} ***\n".format(obj.best_epoch)) model.save(experiment_path) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) trainer.append_task(tasks.PrintTime(each_k_update=100)) # Profiling # Add stopping criteria trainer.append_task(stopping_criteria.MaxEpochStopping(args.max_epoch)) early_stopping = stopping_criteria.EarlyStopping( lookahead_loss, lookahead=args.lookahead, eps=args.lookahead_eps, callback=save_improvement) trainer.append_task(early_stopping) with Timer("Compiling Theano graph"): trainer.build_theano_graph() if resuming: if not os.path.isdir(pjoin(experiment_path, 'training')): print("No 'training/' folder. Assuming it failed before" " the end of the first epoch. Starting a new training.") else: with Timer("Loading"): trainer.load(experiment_path) with Timer("Training"): trainer.train()
def main(): parser = buildArgsParser() args = parser.parse_args() data = MaskClassifierData.load(args.dataset) positive_coords = data.positive_coords negative_coords = data.negative_coords rng = np.random.RandomState(args.seed) with Timer("Splitting {} using split: {}".format(args.dataset, args.split)): nb_positive_examples = positive_coords.shape[0] nb_negative_examples = negative_coords.shape[0] positive_indices = np.arange(nb_positive_examples) negative_indices = np.arange(nb_negative_examples) rng.shuffle(positive_indices) rng.shuffle(negative_indices) train_positive_size = int( np.round(args.split[0] * nb_positive_examples)) train_negative_size = int( np.round(args.split[0] * nb_negative_examples)) valid_positive_size = int( np.round(args.split[1] * nb_positive_examples)) valid_negative_size = int( np.round(args.split[1] * nb_negative_examples)) test_positive_size = int(np.round(args.split[2] * nb_positive_examples)) test_negative_size = int(np.round(args.split[2] * nb_negative_examples)) # Make sure the splits sum to nb_examples test_positive_size += nb_positive_examples - ( train_positive_size + valid_positive_size + test_positive_size) test_negative_size += nb_negative_examples - ( train_negative_size + valid_negative_size + test_negative_size) assert train_positive_size + valid_positive_size + test_positive_size == nb_positive_examples assert train_negative_size + valid_negative_size + test_negative_size == nb_negative_examples train_positive_indices = positive_indices[:train_positive_size] valid_positive_indices = positive_indices[ train_positive_size:train_positive_size + valid_positive_size] test_positive_indices = positive_indices[train_positive_size + valid_positive_size:] train_negative_indices = negative_indices[:train_negative_size] valid_negative_indices = negative_indices[ train_negative_size:train_negative_size + valid_negative_size] test_negative_indices = negative_indices[train_negative_size + valid_negative_size:] train_data = MaskClassifierData( data.signal, data.gradients, data.mask, positive_coords[train_positive_indices], negative_coords[train_negative_indices]) valid_data = MaskClassifierData( data.signal, data.gradients, data.mask, positive_coords[valid_positive_indices], negative_coords[valid_negative_indices]) test_data = MaskClassifierData(data.signal, data.gradients, data.mask, positive_coords[test_positive_indices], negative_coords[test_negative_indices]) with Timer("Saving"): train_data.save(args.dataset[:-4] + "_trainset.npz") valid_data.save(args.dataset[:-4] + "_validset.npz") test_data.save(args.dataset[:-4] + "_testset.npz") if args.delete: os.remove(args.dataset)
def main(): parser = build_argparser() args = parser.parse_args() print(args) print("Using Theano v.{}".format(theano.version.short_version)) with Timer("Loading dataset", newline=True): volume_manager = VolumeManager() dataset = datasets.load_tractography_dataset( args.subjects, volume_manager, name="dataset", use_sh_coeffs=args.use_sh_coeffs) print("Total streamlines: {}".format(len(dataset))) with Timer("Running T-SNE", newline=True): batch_scheduler = TractographyBatchScheduler( dataset, batch_size=min(len(dataset), 100000), noisy_streamlines_sigma=False, seed=1234, normalize_target=True) rng = np.random.RandomState(42) rng.shuffle(batch_scheduler.indices) # bundle_name_pattern = "CST_Left" # batch_inputs, batch_targets, batch_mask = batch_scheduler._prepare_batch(trainset.get_bundle(bundle_name_pattern, return_idx=True)) inputs, _, mask = batch_scheduler._next_batch(0) # Keep the same number of streamlines per subject new_inputs = [] new_mask = [] for i in range(len(args.subjects)): subset = inputs[:, 0, -1] == i if subset.sum() < args.nb_streamlines_per_subject: raise NameError( "Not enough streamlines for subject #{}".format(i)) new_inputs += [inputs[subset][:args.nb_streamlines_per_subject]] new_mask += [mask[subset][:args.nb_streamlines_per_subject]] inputs = np.concatenate([new_inputs], axis=0) mask = np.concatenate([new_mask], axis=0) mask = mask.astype(bool) idx = np.arange(mask.sum()) rng.shuffle(idx) coords = T.matrix('coords') eval_at_coords = theano.function([coords], volume_manager.eval_at_coords(coords)) M = 2000 * len(dataset.subjects) coords = inputs[mask][idx[:M]] X = eval_at_coords(coords) from sklearn.manifold.t_sne import TSNE tsne = TSNE(n_components=2, verbose=2, random_state=42) Y = tsne.fit_transform(X) import matplotlib.pyplot as plt plt.figure() ids = range(len(dataset.subjects)) markers = ['s', 'o', '^', 'v', '<', '>', 'h'] colors = ['cyan', 'darkorange', 'darkgreen', 'magenta', 'pink', 'k'] for i, marker, color in zip(ids, markers, colors): idx = coords[:, -1] == i print("Subject #{}: ".format(i), idx.sum()) plt.scatter(Y[idx, 0], Y[idx, 1], 20, color=color, marker=marker, label="Subject #{}".format(i)) plt.legend() plt.show()
def main(): parser = build_parser() args = parser.parse_args() print(args) # Get experiment folder experiment_path = args.name if not os.path.isdir(experiment_path): # If not a directory, it must be the name of the experiment. experiment_path = pjoin(".", "experiments", args.name) if not os.path.isdir(experiment_path): parser.error('Cannot find experiment: {0}!'.format(args.name)) # Load experiments hyperparameters try: hyperparams = smartutils.load_dict_from_json_file( pjoin(experiment_path, "hyperparams.json")) except FileNotFoundError: hyperparams = smartutils.load_dict_from_json_file( pjoin(experiment_path, "..", "hyperparams.json")) # Use this for hyperparams added in a new version, but nonexistent from older versions retrocompatibility_defaults = { 'feed_previous_direction': False, 'predict_offset': False, 'normalize': False, 'keep_step_size': False, 'sort_streamlines': False, 'use_layer_normalization': False, 'drop_prob': 0., 'use_zoneout': False } for new_hyperparams, default_value in retrocompatibility_defaults.items(): if new_hyperparams not in hyperparams: hyperparams[new_hyperparams] = default_value with Timer("Loading dataset", newline=True): volume_manager = VolumeManager() dataset = datasets.load_tractography_dataset( args.subjects, volume_manager, name="dataset", use_sh_coeffs=hyperparams['use_sh_coeffs']) print("Dataset size:", len(dataset)) with Timer("Loading model"): model = None if hyperparams['model'] == 'gru_regression': from learn2track.models import GRU_Regression model = GRU_Regression.create(experiment_path, volume_manager=volume_manager) elif hyperparams['model'] == 'gru_gaussian': from learn2track.models import GRU_Gaussian model = GRU_Gaussian.create(experiment_path, volume_manager=volume_manager) elif hyperparams['model'] == 'gru_mixture': from learn2track.models import GRU_Mixture model = GRU_Mixture.create(experiment_path, volume_manager=volume_manager) elif hyperparams['model'] == 'gru_multistep': from learn2track.models import GRU_Multistep_Gaussian model = GRU_Multistep_Gaussian.create( experiment_path, volume_manager=volume_manager) model.k = 1 model.m = 1 elif hyperparams['model'] == 'ffnn_regression': from learn2track.models import FFNN_Regression model = FFNN_Regression.create(experiment_path, volume_manager=volume_manager) else: raise NameError("Unknown model: {}".format(hyperparams['model'])) model.drop_prob = 0. # Make sure dropout/zoneout is not used when testing with Timer("Building evaluation function"): # Override K for gru_multistep if 'k' in hyperparams: hyperparams['k'] = 1 batch_scheduler = batch_scheduler_factory( hyperparams, dataset, train_mode=False, batch_size_override=args.batch_size) loss = loss_factory(hyperparams, model, dataset, loss_type=args.loss_type) l2_error = views.LossView(loss=loss, batch_scheduler=batch_scheduler) with Timer("Evaluating...", newline=True): results_file = pjoin(experiment_path, "results.json") results = {} if os.path.isfile(results_file) and not args.force: print( "Loading saved results... (use --force to re-run evaluation)") results = smartutils.load_dict_from_json_file(results_file) tag = "" if args.loss_type == 'expected_value' or hyperparams[ 'model'] == 'gru_regression': tag = "_EV_L2_error" elif args.loss_type == 'maximum_component': tag = "_MC_L2_error" elif hyperparams['model'] in [ 'gru_gaussian', 'gru_mixture', 'gru_multistep' ]: tag = "_NLL" entry = args.dataset_name + tag if entry not in results or args.force: with Timer("Evaluating {}".format(entry)): dummy_status = Status() # Forces recomputing results results[entry] = { 'mean': float(l2_error.mean.view(dummy_status)), 'stderror': float(l2_error.stderror.view(dummy_status)) } smartutils.save_dict_to_json_file( results_file, results) # Update results file. print("{}: {:.4f} ± {:.4f}".format(entry, results[entry]['mean'], results[entry]['stderror']))
def test_gru_multistep_fprop_k3(): hidden_sizes = 50 hyperparams = { 'model': 'gru_multistep', 'k': 3, 'm': 3, 'batch_size': 16, 'SGD': "1e-2", 'hidden_sizes': hidden_sizes, 'learn_to_stop': False, 'normalize': False, 'noisy_streamlines_sigma': None, 'shuffle_streamlines': True, 'seed': 1234 } with Timer("Creating dataset", newline=True): volume_manager = neurotools.VolumeManager() trainset = make_dummy_dataset(volume_manager) print("Dataset sizes:", len(trainset)) batch_scheduler = factories.batch_scheduler_factory(hyperparams, trainset, train_mode=True) print("An epoch will be composed of {} updates.".format( batch_scheduler.nb_updates_per_epoch)) print(volume_manager.data_dimension, hidden_sizes, batch_scheduler.target_size) with Timer("Creating model"): model = factories.model_factory( hyperparams, input_size=volume_manager.data_dimension, output_size=batch_scheduler.target_size, volume_manager=volume_manager) model.initialize( factories.weigths_initializer_factory("orthogonal", seed=1234)) # Test fprop output = model.get_output(trainset.symb_inputs) fct = theano.function([trainset.symb_inputs], output, updates=model.graph_updates) batch_inputs, batch_targets, batch_mask = batch_scheduler._next_batch(2) out = fct(batch_inputs) with Timer("Building optimizer"): loss = factories.loss_factory(hyperparams, model, trainset) optimizer = factories.optimizer_factory(hyperparams, loss) fct_loss = theano.function( [trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask], loss.loss, updates=model.graph_updates) loss_value = fct_loss(batch_inputs, batch_targets, batch_mask) print("Loss:", loss_value) fct_optim = theano.function( [trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask], list(optimizer.directions.values()), updates=model.graph_updates) dirs = fct_optim(batch_inputs, batch_targets, batch_mask)
def main(): parser = build_argparser() args = parser.parse_args() # Get experiment folder experiment_path = args.name if not os.path.isdir(experiment_path): # If not a directory, it must be the name of the experiment. experiment_path = pjoin(".", "experiments", args.name) if not os.path.isdir(experiment_path): parser.error('Cannot find experiment: {0}!'.format(args.name)) # Load experiments hyperparameters try: hyperparams = smartutils.load_dict_from_json_file( pjoin(experiment_path, "hyperparams.json")) except FileNotFoundError: hyperparams = smartutils.load_dict_from_json_file( pjoin(experiment_path, "..", "hyperparams.json")) with Timer("Loading DWIs"): # Load gradients table dwi_name = args.dwi if dwi_name.endswith(".gz"): dwi_name = dwi_name[:-3] if dwi_name.endswith(".nii"): dwi_name = dwi_name[:-4] bvals_filename = dwi_name + ".bvals" bvecs_filename = dwi_name + ".bvecs" bvals, bvecs = dipy.io.gradients.read_bvals_bvecs( bvals_filename, bvecs_filename) dwi = nib.load(args.dwi) if hyperparams["use_sh_coeffs"]: # Use 45 spherical harmonic coefficients to represent the diffusion signal. weights = neurotools.get_spherical_harmonics_coefficients( dwi, bvals, bvecs).astype(np.float32) else: # Resample the diffusion signal to have 100 directions. weights = neurotools.resample_dwi(dwi, bvals, bvecs).astype(np.float32) with Timer("Loading model"): if hyperparams["model"] == "ffnn_classification": from learn2track.models import FFNN_Classification model_class = FFNN_Classification else: raise ValueError("Unknown model!") kwargs = {} volume_manager = neurotools.VolumeManager() volume_manager.register(weights) kwargs['volume_manager'] = volume_manager # Load the actual model. model = model_class.create( pjoin(experiment_path), **kwargs) # Create new instance and restore model. print(str(model)) with Timer("Generating mask"): symb_input = T.matrix(name="input") model_symb_pred = model.get_output(symb_input) f = theano.function(inputs=[symb_input], outputs=[model_symb_pred]) generated_mask = np.zeros(dwi.shape[:3]).astype(np.float32) # all_coords.shape = (n_coords, 3) all_coords = np.argwhere(generated_mask == 0) volume_ids = np.zeros((all_coords.shape[0], 1)) all_coords_and_volume_ids = np.concatenate((all_coords, volume_ids), axis=1).astype(np.float32) batch_size = args.batch_size if args.batch_size else len( all_coords_and_volume_ids) probs = [] while batch_size > 1: print("Trying to to process batches of size {} out of {}".format( batch_size, len(all_coords_and_volume_ids))) nb_batches = int( np.ceil(len(all_coords_and_volume_ids) / batch_size)) try: for batch_count in range(nb_batches): start = batch_count * batch_size end = (batch_count + 1) * batch_size probs.extend(f(all_coords_and_volume_ids[start:end])[-1]) print("Generated batch {} out of {}".format( batch_count + 1, nb_batches)) break except MemoryError: print("{} coordinates at the same time is too much!".format( batch_size)) batch_size //= 2 except RuntimeError: print("{} coordinates at the same time is too much!".format( batch_size)) batch_size //= 2 if not probs: raise RuntimeError("Could not generate predictions...") generated_mask[np.where(generated_mask == 0)] = np.array(probs) > 0.5 with Timer("Saving generated mask"): filename = args.out if args.out is None: prefix = args.prefix if prefix is None: dwi_name = os.path.basename(args.dwi) if dwi_name.endswith(".nii.gz"): dwi_name = dwi_name[:-7] else: # .nii dwi_name = dwi_name[:-4] prefix = os.path.basename(os.path.dirname(args.dwi)) + dwi_name prefix = prefix.replace(".", "_") filename = "{}.nii.gz".format(prefix) save_path = pjoin(experiment_path, filename) try: # Create dirs, if needed. os.makedirs(os.path.dirname(save_path)) except: pass print("Saving to {}".format(save_path)) mask = nib.Nifti1Image(generated_mask, dwi.affine) nib.save(mask, save_path)