def main(): parser = build_args_parser() args = parser.parse_args() print(args) # Get experiment folder experiment_path = args.name if not os.path.isdir(experiment_path): # If not a directory, it must be the name of the experiment. experiment_path = pjoin(".", "experiments", args.name) if not os.path.isdir(experiment_path): parser.error('Cannot find experiment: {0}!'.format(args.name)) # Load experiments hyperparameters try: hyperparams = smartutils.load_dict_from_json_file(pjoin(experiment_path, "hyperparams.json")) except FileNotFoundError: hyperparams = smartutils.load_dict_from_json_file(pjoin(experiment_path, "..", "hyperparams.json")) with Timer("Loading dataset", newline=True): volume_manager = VolumeManager() dataset = datasets.load_tractography_dataset([args.streamlines], volume_manager, name="dataset", use_sh_coeffs=hyperparams['use_sh_coeffs']) print("Dataset size:", len(dataset)) with Timer("Loading model"): model = None if hyperparams['model'] == 'gru_regression': from learn2track.models import GRU_Regression model = GRU_Regression.create(experiment_path, volume_manager=volume_manager) elif hyperparams['model'] == 'gru_mixture': from learn2track.models import GRU_Mixture model = GRU_Mixture.create(experiment_path, volume_manager=volume_manager) elif hyperparams['model'] == 'gru_multistep': from learn2track.models import GRU_Multistep_Gaussian model = GRU_Multistep_Gaussian.create(experiment_path, volume_manager=volume_manager) model.k = 1 model.m = 1 elif hyperparams['model'] == 'ffnn_regression': from learn2track.models import FFNN_Regression model = FFNN_Regression.create(experiment_path, volume_manager=volume_manager) else: raise NameError("Unknown model: {}".format(hyperparams['model'])) print(str(model)) tractogram_file = pjoin(experiment_path, args.out) if not os.path.isfile(tractogram_file) or args.force: if args.method == 'prediction': tractogram = prediction_tractogram(hyperparams, model, dataset, args.batch_size, args.prediction) elif args.method == 'evaluation': tractogram = evaluation_tractogram(hyperparams, model, dataset, args.batch_size, args.metric) else: raise ValueError("Unrecognized method: {}".format(args.method)) tractogram.affine_to_rasmm = dataset.subjects[0].signal.affine nib.streamlines.save(tractogram, tractogram_file) else: print("Tractogram already exists. (use --force to generate it again)")
def main(): parser = build_parser() args = parser.parse_args() print(args) # Get experiment folder experiment_path = args.name if not os.path.isdir(experiment_path): # If not a directory, it must be the name of the experiment. experiment_path = pjoin(".", "experiments", args.name) if not os.path.isdir(experiment_path): parser.error("Cannot find experiment: {0}!".format(args.name)) # Load experiments hyperparameters try: hyperparams = smartutils.load_dict_from_json_file(pjoin(experiment_path, "hyperparams.json")) except FileNotFoundError: hyperparams = smartutils.load_dict_from_json_file(pjoin(experiment_path, "..", "hyperparams.json")) with Timer("Loading dataset", newline=True): volume_manager = VolumeManager() dataset = datasets.load_tractography_dataset( [args.subject], volume_manager, name="dataset", use_sh_coeffs=hyperparams["use_sh_coeffs"] ) print("Dataset size:", len(dataset)) with Timer("Loading model"): if hyperparams["model"] == "gru_regression": from learn2track.models import GRU_Regression model = GRU_Regression.create(experiment_path, volume_manager=volume_manager) else: raise NameError("Unknown model: {}".format(hyperparams["model"])) with Timer("Building evaluation function"): loss = loss_factory(hyperparams, model, dataset) batch_scheduler = batch_schedulers.TractographyBatchScheduler( dataset, batch_size=1000, noisy_streamlines_sigma=None, use_data_augment=False, # Otherwise it doubles the number of losses :-/ seed=1234, shuffle_streamlines=False, normalize_target=hyperparams["normalize"], ) loss_view = views.LossView(loss=loss, batch_scheduler=batch_scheduler) losses = loss_view.losses.view() with Timer("Saving streamlines"): tractogram = Tractogram(dataset.streamlines, affine_to_rasmm=dataset.subjects[0].signal.affine) tractogram.data_per_streamline["loss"] = losses nib.streamlines.save(tractogram, args.out)
def main(): parser = build_args_parser() args = parser.parse_args() print(args) # Get experiment folder experiment_path = args.name if not os.path.isdir(experiment_path): # If not a directory, it must be the name of the experiment. experiment_path = pjoin(".", "experiments", args.name) if not os.path.isdir(experiment_path): parser.error('Cannot find experiment: {0}!'.format(args.name)) # Load experiments hyperparameters try: hyperparams = smartutils.load_dict_from_json_file( pjoin(experiment_path, "hyperparams.json")) except FileNotFoundError: hyperparams = smartutils.load_dict_from_json_file( pjoin(experiment_path, "..", "hyperparams.json")) with Timer("Loading dataset", newline=True): volume_manager = VolumeManager() dataset = datasets.load_tractography_dataset( [args.streamlines], volume_manager, name="dataset", use_sh_coeffs=hyperparams['use_sh_coeffs']) print("Dataset size:", len(dataset)) with Timer("Loading model"): model = None if hyperparams['model'] == 'gru_regression': from learn2track.models import GRU_Regression model = GRU_Regression.create(experiment_path, volume_manager=volume_manager) elif hyperparams['model'] == 'gru_mixture': from learn2track.models import GRU_Mixture model = GRU_Mixture.create(experiment_path, volume_manager=volume_manager) elif hyperparams['model'] == 'gru_multistep': from learn2track.models import GRU_Multistep_Gaussian model = GRU_Multistep_Gaussian.create( experiment_path, volume_manager=volume_manager) model.k = 1 model.m = 1 elif hyperparams['model'] == 'ffnn_regression': from learn2track.models import FFNN_Regression model = FFNN_Regression.create(experiment_path, volume_manager=volume_manager) else: raise NameError("Unknown model: {}".format(hyperparams['model'])) print(str(model)) tractogram_file = pjoin(experiment_path, args.out) if not os.path.isfile(tractogram_file) or args.force: if args.method == 'prediction': tractogram = prediction_tractogram(hyperparams, model, dataset, args.batch_size, args.prediction) elif args.method == 'evaluation': tractogram = evaluation_tractogram(hyperparams, model, dataset, args.batch_size, args.metric) else: raise ValueError("Unrecognized method: {}".format(args.method)) tractogram.affine_to_rasmm = dataset.subjects[0].signal.affine nib.streamlines.save(tractogram, tractogram_file) else: print("Tractogram already exists. (use --force to generate it again)")
def main(): parser = build_argparser() args = parser.parse_args() print(args) print("Using Theano v.{}".format(theano.version.short_version)) hyperparams_to_exclude = ['max_epoch', 'force', 'name', 'view', 'shuffle_streamlines'] # Use this for hyperparams added in a new version, but nonexistent from older versions retrocompatibility_defaults = {'feed_previous_direction': False, 'normalize': False} experiment_path, hyperparams, resuming = utils.maybe_create_experiment_folder(args, exclude=hyperparams_to_exclude, retrocompatibility_defaults=retrocompatibility_defaults) # Log the command currently running. with open(pjoin(experiment_path, 'cmd.txt'), 'a') as f: f.write(" ".join(sys.argv) + "\n") print("Resuming:" if resuming else "Creating:", experiment_path) with Timer("Loading dataset", newline=True): trainset_volume_manager = VolumeManager() validset_volume_manager = VolumeManager() trainset = datasets.load_tractography_dataset(args.train_subjects, trainset_volume_manager, name="trainset", use_sh_coeffs=args.use_sh_coeffs) validset = datasets.load_tractography_dataset(args.valid_subjects, validset_volume_manager, name="validset", use_sh_coeffs=args.use_sh_coeffs) print("Dataset sizes:", len(trainset), " |", len(validset)) if args.view: tsne_view(trainset, trainset_volume_manager) sys.exit(0) batch_scheduler = batch_scheduler_factory(hyperparams, dataset=trainset, train_mode=True) print("An epoch will be composed of {} updates.".format(batch_scheduler.nb_updates_per_epoch)) print(trainset_volume_manager.data_dimension, args.hidden_sizes, batch_scheduler.target_size) with Timer("Creating model"): input_size = trainset_volume_manager.data_dimension if hyperparams['feed_previous_direction']: input_size += 3 model = model_factory(hyperparams, input_size=input_size, output_size=batch_scheduler.target_size, volume_manager=trainset_volume_manager) model.initialize(weigths_initializer_factory(args.weights_initialization, seed=args.initialization_seed)) with Timer("Building optimizer"): loss = loss_factory(hyperparams, model, trainset) if args.clip_gradient is not None: loss.append_gradient_modifier(DirectionClipping(threshold=args.clip_gradient)) optimizer = optimizer_factory(hyperparams, loss) with Timer("Building trainer"): trainer = Trainer(optimizer, batch_scheduler) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) trainer.append_task(avg_loss) # Print average training loss. trainer.append_task(tasks.Print("Avg. training loss: : {}", avg_loss)) # if args.learn_to_stop: # l2err_monitor = views.MonitorVariable(T.mean(loss.mean_sqr_error)) # avg_l2err = tasks.AveragePerEpoch(l2err_monitor) # trainer.append_task(avg_l2err) # # crossentropy_monitor = views.MonitorVariable(T.mean(loss.cross_entropy)) # avg_crossentropy = tasks.AveragePerEpoch(crossentropy_monitor) # trainer.append_task(avg_crossentropy) # # trainer.append_task(tasks.Print("Avg. training L2 err: : {}", avg_l2err)) # trainer.append_task(tasks.Print("Avg. training stopping: : {}", avg_crossentropy)) # trainer.append_task(tasks.Print("L2 err : {0:.4f}", l2err_monitor, each_k_update=100)) # trainer.append_task(tasks.Print("stopping : {0:.4f}", crossentropy_monitor, each_k_update=100)) # Print NLL mean/stderror. # train_loss = L2DistanceForSequences(model, trainset) # train_batch_scheduler = StreamlinesBatchScheduler(trainset, batch_size=1000, # noisy_streamlines_sigma=None, # nb_updates_per_epoch=None, # seed=1234) # train_error = views.LossView(loss=train_loss, batch_scheduler=train_batch_scheduler) # trainer.append_task(tasks.Print("Trainset - Error : {0:.2f} | {1:.2f}", train_error.sum, train_error.mean)) # HACK: To make sure all subjects in the volume_manager are used in a batch, we have to split the trainset/validset in 2 volume managers model.volume_manager = validset_volume_manager valid_loss = loss_factory(hyperparams, model, validset) valid_batch_scheduler = batch_scheduler_factory(hyperparams, dataset=validset, train_mode=False) valid_error = views.LossView(loss=valid_loss, batch_scheduler=valid_batch_scheduler) trainer.append_task(tasks.Print("Validset - Error : {0:.2f} | {1:.2f}", valid_error.sum, valid_error.mean)) # HACK: Restore trainset volume manager model.volume_manager = trainset_volume_manager lookahead_loss = valid_error.sum direction_norm = views.MonitorVariable(T.sqrt(sum(map(lambda d: T.sqr(d).sum(), loss.gradients.values())))) # trainer.append_task(tasks.Print("||d|| : {0:.4f}", direction_norm)) # logger = tasks.Logger(train_error.mean, valid_error.mean, valid_error.sum, direction_norm) logger = tasks.Logger(valid_error.mean, valid_error.sum, direction_norm) trainer.append_task(logger) if args.view: import pylab as plt def _plot(*args, **kwargs): plt.figure(1) plt.clf() plt.show(False) plt.subplot(121) plt.plot(np.array(logger.get_variable_history(0)).flatten(), label="Train") plt.plot(np.array(logger.get_variable_history(1)).flatten(), label="Valid") plt.legend() plt.subplot(122) plt.plot(np.array(logger.get_variable_history(3)).flatten(), label="||d'||") plt.draw() trainer.append_task(tasks.Callback(_plot)) # Callback function to stop training if NaN is detected. def detect_nan(obj, status): if np.isnan(model.parameters[0].get_value().sum()): print("NaN detected! Stopping training now.") sys.exit() trainer.append_task(tasks.Callback(detect_nan, each_k_update=1)) # Callback function to save training progression. def save_training(obj, status): trainer.save(experiment_path) trainer.append_task(tasks.Callback(save_training)) # Early stopping with a callback for saving every time model improves. def save_improvement(obj, status): """ Save best model and training progression. """ if np.isnan(model.parameters[0].get_value().sum()): print("NaN detected! Not saving the model. Crashing now.") sys.exit() print("*** Best epoch: {0} ***\n".format(obj.best_epoch)) model.save(experiment_path) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) trainer.append_task(tasks.PrintTime(each_k_update=100)) # Profiling # Add stopping criteria trainer.append_task(stopping_criteria.MaxEpochStopping(args.max_epoch)) early_stopping = stopping_criteria.EarlyStopping(lookahead_loss, lookahead=args.lookahead, eps=args.lookahead_eps, callback=save_improvement) trainer.append_task(early_stopping) with Timer("Compiling Theano graph"): trainer.build_theano_graph() if resuming: if not os.path.isdir(pjoin(experiment_path, 'training')): print("No 'training/' folder. Assuming it failed before" " the end of the first epoch. Starting a new training.") else: with Timer("Loading"): trainer.load(experiment_path) with Timer("Training"): trainer.train()
def main(): parser = build_argparser() args = parser.parse_args() print(args) print("Using Theano v.{}".format(theano.version.short_version)) hyperparams_to_exclude = ['max_epoch', 'force', 'name', 'view', 'shuffle_streamlines'] # Use this for hyperparams added in a new version, but nonexistent from older versions retrocompatibility_defaults = {'feed_previous_direction': False, 'predict_offset': False, 'normalize': False, 'sort_streamlines': False, 'keep_step_size': False, 'use_layer_normalization': False, 'drop_prob': 0., 'use_zoneout': False, 'skip_connections': False} experiment_path, hyperparams, resuming = utils.maybe_create_experiment_folder(args, exclude=hyperparams_to_exclude, retrocompatibility_defaults=retrocompatibility_defaults) # Log the command currently running. with open(pjoin(experiment_path, 'cmd.txt'), 'a') as f: f.write(" ".join(sys.argv) + "\n") print("Resuming:" if resuming else "Creating:", experiment_path) with Timer("Loading dataset", newline=True): trainset_volume_manager = VolumeManager() validset_volume_manager = VolumeManager() trainset = datasets.load_tractography_dataset(args.train_subjects, trainset_volume_manager, name="trainset", use_sh_coeffs=args.use_sh_coeffs) validset = datasets.load_tractography_dataset(args.valid_subjects, validset_volume_manager, name="validset", use_sh_coeffs=args.use_sh_coeffs) print("Dataset sizes:", len(trainset), " |", len(validset)) batch_scheduler = batch_scheduler_factory(hyperparams, dataset=trainset, train_mode=True) print("An epoch will be composed of {} updates.".format(batch_scheduler.nb_updates_per_epoch)) print(trainset_volume_manager.data_dimension, args.hidden_sizes, batch_scheduler.target_size) with Timer("Creating model"): input_size = trainset_volume_manager.data_dimension if hyperparams['feed_previous_direction']: input_size += 3 model = model_factory(hyperparams, input_size=input_size, output_size=batch_scheduler.target_size, volume_manager=trainset_volume_manager) model.initialize(weigths_initializer_factory(args.weights_initialization, seed=args.initialization_seed)) with Timer("Building optimizer"): loss = loss_factory(hyperparams, model, trainset) if args.clip_gradient is not None: loss.append_gradient_modifier(DirectionClipping(threshold=args.clip_gradient)) optimizer = optimizer_factory(hyperparams, loss) with Timer("Building trainer"): trainer = Trainer(optimizer, batch_scheduler) # Log training error loss_monitor = views.MonitorVariable(loss.loss) avg_loss = tasks.AveragePerEpoch(loss_monitor) trainer.append_task(avg_loss) # Print average training loss. trainer.append_task(tasks.Print("Avg. training loss: : {}", avg_loss)) # if args.learn_to_stop: # l2err_monitor = views.MonitorVariable(T.mean(loss.mean_sqr_error)) # avg_l2err = tasks.AveragePerEpoch(l2err_monitor) # trainer.append_task(avg_l2err) # # crossentropy_monitor = views.MonitorVariable(T.mean(loss.cross_entropy)) # avg_crossentropy = tasks.AveragePerEpoch(crossentropy_monitor) # trainer.append_task(avg_crossentropy) # # trainer.append_task(tasks.Print("Avg. training L2 err: : {}", avg_l2err)) # trainer.append_task(tasks.Print("Avg. training stopping: : {}", avg_crossentropy)) # trainer.append_task(tasks.Print("L2 err : {0:.4f}", l2err_monitor, each_k_update=100)) # trainer.append_task(tasks.Print("stopping : {0:.4f}", crossentropy_monitor, each_k_update=100)) # Print NLL mean/stderror. # train_loss = L2DistanceForSequences(model, trainset) # train_batch_scheduler = StreamlinesBatchScheduler(trainset, batch_size=1000, # noisy_streamlines_sigma=None, # nb_updates_per_epoch=None, # seed=1234) # train_error = views.LossView(loss=train_loss, batch_scheduler=train_batch_scheduler) # trainer.append_task(tasks.Print("Trainset - Error : {0:.2f} | {1:.2f}", train_error.sum, train_error.mean)) # HACK: To make sure all subjects in the volume_manager are used in a batch, we have to split the trainset/validset in 2 volume managers model.volume_manager = validset_volume_manager model.drop_prob = 0. # Do not use dropout/zoneout for evaluation valid_loss = loss_factory(hyperparams, model, validset) valid_batch_scheduler = batch_scheduler_factory(hyperparams, dataset=validset, train_mode=False) valid_error = views.LossView(loss=valid_loss, batch_scheduler=valid_batch_scheduler) trainer.append_task(tasks.Print("Validset - Error : {0:.2f} | {1:.2f}", valid_error.sum, valid_error.mean)) if hyperparams['model'] == 'ffnn_regression': valid_batch_scheduler2 = batch_scheduler_factory(hyperparams, dataset=validset, train_mode=False) valid_l2 = loss_factory(hyperparams, model, validset, loss_type="expected_value") valid_l2_error = views.LossView(loss=valid_l2, batch_scheduler=valid_batch_scheduler2) trainer.append_task(tasks.Print("Validset - {}".format(valid_l2.__class__.__name__) + "\t: {0:.2f} | {1:.2f}", valid_l2_error.sum, valid_l2_error.mean)) # HACK: Restore trainset volume manager model.volume_manager = trainset_volume_manager model.drop_prob = hyperparams['drop_prob'] # Restore dropout lookahead_loss = valid_error.sum direction_norm = views.MonitorVariable(T.sqrt(sum(map(lambda d: T.sqr(d).sum(), loss.gradients.values())))) # trainer.append_task(tasks.Print("||d|| : {0:.4f}", direction_norm)) # logger = tasks.Logger(train_error.mean, valid_error.mean, valid_error.sum, direction_norm) logger = tasks.Logger(valid_error.mean, valid_error.sum, direction_norm) trainer.append_task(logger) if args.view: import pylab as plt def _plot(*args, **kwargs): plt.figure(1) plt.clf() plt.show(False) plt.subplot(121) plt.plot(np.array(logger.get_variable_history(0)).flatten(), label="Train") plt.plot(np.array(logger.get_variable_history(1)).flatten(), label="Valid") plt.legend() plt.subplot(122) plt.plot(np.array(logger.get_variable_history(3)).flatten(), label="||d'||") plt.draw() trainer.append_task(tasks.Callback(_plot)) # Callback function to stop training if NaN is detected. def detect_nan(obj, status): if np.isnan(model.parameters[0].get_value().sum()): print("NaN detected! Stopping training now.") sys.exit() trainer.append_task(tasks.Callback(detect_nan, each_k_update=1)) # Callback function to save training progression. def save_training(obj, status): trainer.save(experiment_path) trainer.append_task(tasks.Callback(save_training)) # Early stopping with a callback for saving every time model improves. def save_improvement(obj, status): """ Save best model and training progression. """ if np.isnan(model.parameters[0].get_value().sum()): print("NaN detected! Not saving the model. Crashing now.") sys.exit() print("*** Best epoch: {0} ***\n".format(obj.best_epoch)) model.save(experiment_path) # Print time for one epoch trainer.append_task(tasks.PrintEpochDuration()) trainer.append_task(tasks.PrintTrainingDuration()) trainer.append_task(tasks.PrintTime(each_k_update=100)) # Profiling # Add stopping criteria trainer.append_task(stopping_criteria.MaxEpochStopping(args.max_epoch)) early_stopping = stopping_criteria.EarlyStopping(lookahead_loss, lookahead=args.lookahead, eps=args.lookahead_eps, callback=save_improvement) trainer.append_task(early_stopping) with Timer("Compiling Theano graph"): trainer.build_theano_graph() if resuming: if not os.path.isdir(pjoin(experiment_path, 'training')): print("No 'training/' folder. Assuming it failed before" " the end of the first epoch. Starting a new training.") else: with Timer("Loading"): trainer.load(experiment_path) with Timer("Training"): trainer.train()
def main(): parser = build_parser() args = parser.parse_args() print(args) # Get experiment folder experiment_path = args.name if not os.path.isdir(experiment_path): # If not a directory, it must be the name of the experiment. experiment_path = pjoin(".", "experiments", args.name) if not os.path.isdir(experiment_path): parser.error('Cannot find experiment: {0}!'.format(args.name)) # Load experiments hyperparameters try: hyperparams = smartutils.load_dict_from_json_file(pjoin(experiment_path, "hyperparams.json")) except FileNotFoundError: hyperparams = smartutils.load_dict_from_json_file(pjoin(experiment_path, "..", "hyperparams.json")) with Timer("Loading dataset", newline=True): volume_manager = VolumeManager() dataset = datasets.load_tractography_dataset(args.subjects, volume_manager, name="dataset", use_sh_coeffs=hyperparams['use_sh_coeffs']) print("Dataset size:", len(dataset)) with Timer("Loading model"): model = None if hyperparams['model'] == 'gru_regression': from learn2track.models import GRU_Regression model = GRU_Regression.create(experiment_path, volume_manager=volume_manager) elif hyperparams['model'] == 'gru_mixture': from learn2track.models import GRU_Mixture model = GRU_Mixture.create(experiment_path, volume_manager=volume_manager) elif hyperparams['model'] == 'gru_multistep': from learn2track.models import GRU_Multistep_Gaussian model = GRU_Multistep_Gaussian.create(experiment_path, volume_manager=volume_manager) model.k = 1 model.m = 1 elif hyperparams['model'] == 'ffnn_regression': from learn2track.models import FFNN_Regression model = FFNN_Regression.create(experiment_path, volume_manager=volume_manager) else: raise NameError("Unknown model: {}".format(hyperparams['model'])) with Timer("Building evaluation function"): # Override K for gru_multistep if 'k' in hyperparams: hyperparams['k'] = 1 batch_scheduler = batch_scheduler_factory(hyperparams, dataset, train_mode=False, batch_size_override=args.batch_size) loss = loss_factory(hyperparams, model, dataset, loss_type=args.loss_type) l2_error = views.LossView(loss=loss, batch_scheduler=batch_scheduler) with Timer("Evaluating...", newline=True): results_file = pjoin(experiment_path, "results.json") results = {} if os.path.isfile(results_file) and not args.force: print("Loading saved results... (use --force to re-run evaluation)") results = smartutils.load_dict_from_json_file(results_file) tag = "" if args.loss_type == 'expected_value' or hyperparams['model'] == 'gru_regression': tag = "_EV_L2_error" elif args.loss_type == 'maximum_component': tag = "_MC_L2_error" elif hyperparams['model'] == 'gru_mixture' or hyperparams['model'] == 'gru_multistep': tag = "_NLL" entry = args.dataset_name + tag if entry not in results or args.force: with Timer("Evaluating {}".format(entry)): dummy_status = Status() # Forces recomputing results results[entry] = {'mean': float(l2_error.mean.view(dummy_status)), 'stderror': float(l2_error.stderror.view(dummy_status))} smartutils.save_dict_to_json_file(results_file, results) # Update results file. print("{}: {:.4f} ± {:.4f}".format(entry, results[entry]['mean'], results[entry]['stderror']))
def main(): parser = build_argparser() args = parser.parse_args() print(args) print("Using Theano v.{}".format(theano.version.short_version)) with Timer("Loading dataset", newline=True): volume_manager = VolumeManager() dataset = datasets.load_tractography_dataset( args.subjects, volume_manager, name="dataset", use_sh_coeffs=args.use_sh_coeffs) print("Total streamlines: {}".format(len(dataset))) with Timer("Running T-SNE", newline=True): batch_scheduler = TractographyBatchScheduler( dataset, batch_size=min(len(dataset), 100000), noisy_streamlines_sigma=False, seed=1234, normalize_target=True) rng = np.random.RandomState(42) rng.shuffle(batch_scheduler.indices) # bundle_name_pattern = "CST_Left" # batch_inputs, batch_targets, batch_mask = batch_scheduler._prepare_batch(trainset.get_bundle(bundle_name_pattern, return_idx=True)) inputs, _, mask = batch_scheduler._next_batch(0) # Keep the same number of streamlines per subject new_inputs = [] new_mask = [] for i in range(len(args.subjects)): subset = inputs[:, 0, -1] == i if subset.sum() < args.nb_streamlines_per_subject: raise NameError( "Not enough streamlines for subject #{}".format(i)) new_inputs += [inputs[subset][:args.nb_streamlines_per_subject]] new_mask += [mask[subset][:args.nb_streamlines_per_subject]] inputs = np.concatenate([new_inputs], axis=0) mask = np.concatenate([new_mask], axis=0) mask = mask.astype(bool) idx = np.arange(mask.sum()) rng.shuffle(idx) coords = T.matrix('coords') eval_at_coords = theano.function([coords], volume_manager.eval_at_coords(coords)) M = 2000 * len(dataset.subjects) coords = inputs[mask][idx[:M]] X = eval_at_coords(coords) from sklearn.manifold.t_sne import TSNE tsne = TSNE(n_components=2, verbose=2, random_state=42) Y = tsne.fit_transform(X) import matplotlib.pyplot as plt plt.figure() ids = range(len(dataset.subjects)) markers = ['s', 'o', '^', 'v', '<', '>', 'h'] colors = ['cyan', 'darkorange', 'darkgreen', 'magenta', 'pink', 'k'] for i, marker, color in zip(ids, markers, colors): idx = coords[:, -1] == i print("Subject #{}: ".format(i), idx.sum()) plt.scatter(Y[idx, 0], Y[idx, 1], 20, color=color, marker=marker, label="Subject #{}".format(i)) plt.legend() plt.show()
def main(): parser = build_parser() args = parser.parse_args() print(args) # Get experiment folder experiment_path = args.name if not os.path.isdir(experiment_path): # If not a directory, it must be the name of the experiment. experiment_path = pjoin(".", "experiments", args.name) if not os.path.isdir(experiment_path): parser.error('Cannot find experiment: {0}!'.format(args.name)) # Load experiments hyperparameters try: hyperparams = smartutils.load_dict_from_json_file( pjoin(experiment_path, "hyperparams.json")) except FileNotFoundError: hyperparams = smartutils.load_dict_from_json_file( pjoin(experiment_path, "..", "hyperparams.json")) # Use this for hyperparams added in a new version, but nonexistent from older versions retrocompatibility_defaults = { 'feed_previous_direction': False, 'predict_offset': False, 'normalize': False, 'keep_step_size': False, 'sort_streamlines': False, 'use_layer_normalization': False, 'drop_prob': 0., 'use_zoneout': False } for new_hyperparams, default_value in retrocompatibility_defaults.items(): if new_hyperparams not in hyperparams: hyperparams[new_hyperparams] = default_value with Timer("Loading dataset", newline=True): volume_manager = VolumeManager() dataset = datasets.load_tractography_dataset( args.subjects, volume_manager, name="dataset", use_sh_coeffs=hyperparams['use_sh_coeffs']) print("Dataset size:", len(dataset)) with Timer("Loading model"): model = None if hyperparams['model'] == 'gru_regression': from learn2track.models import GRU_Regression model = GRU_Regression.create(experiment_path, volume_manager=volume_manager) elif hyperparams['model'] == 'gru_gaussian': from learn2track.models import GRU_Gaussian model = GRU_Gaussian.create(experiment_path, volume_manager=volume_manager) elif hyperparams['model'] == 'gru_mixture': from learn2track.models import GRU_Mixture model = GRU_Mixture.create(experiment_path, volume_manager=volume_manager) elif hyperparams['model'] == 'gru_multistep': from learn2track.models import GRU_Multistep_Gaussian model = GRU_Multistep_Gaussian.create( experiment_path, volume_manager=volume_manager) model.k = 1 model.m = 1 elif hyperparams['model'] == 'ffnn_regression': from learn2track.models import FFNN_Regression model = FFNN_Regression.create(experiment_path, volume_manager=volume_manager) else: raise NameError("Unknown model: {}".format(hyperparams['model'])) model.drop_prob = 0. # Make sure dropout/zoneout is not used when testing with Timer("Building evaluation function"): # Override K for gru_multistep if 'k' in hyperparams: hyperparams['k'] = 1 batch_scheduler = batch_scheduler_factory( hyperparams, dataset, train_mode=False, batch_size_override=args.batch_size) loss = loss_factory(hyperparams, model, dataset, loss_type=args.loss_type) l2_error = views.LossView(loss=loss, batch_scheduler=batch_scheduler) with Timer("Evaluating...", newline=True): results_file = pjoin(experiment_path, "results.json") results = {} if os.path.isfile(results_file) and not args.force: print( "Loading saved results... (use --force to re-run evaluation)") results = smartutils.load_dict_from_json_file(results_file) tag = "" if args.loss_type == 'expected_value' or hyperparams[ 'model'] == 'gru_regression': tag = "_EV_L2_error" elif args.loss_type == 'maximum_component': tag = "_MC_L2_error" elif hyperparams['model'] in [ 'gru_gaussian', 'gru_mixture', 'gru_multistep' ]: tag = "_NLL" entry = args.dataset_name + tag if entry not in results or args.force: with Timer("Evaluating {}".format(entry)): dummy_status = Status() # Forces recomputing results results[entry] = { 'mean': float(l2_error.mean.view(dummy_status)), 'stderror': float(l2_error.stderror.view(dummy_status)) } smartutils.save_dict_to_json_file( results_file, results) # Update results file. print("{}: {:.4f} ± {:.4f}".format(entry, results[entry]['mean'], results[entry]['stderror']))