def test_gru_mixture_fprop():
    hidden_sizes = 50

    with Timer("Creating dataset", newline=True):
        volume_manager = neurotools.VolumeManager()
        trainset = make_dummy_dataset(volume_manager)
        print("Dataset sizes:", len(trainset))

        batch_scheduler = batch_schedulers.TractographyBatchScheduler(
            trainset, batch_size=16, noisy_streamlines_sigma=None, seed=1234)
        print("An epoch will be composed of {} updates.".format(
            batch_scheduler.nb_updates_per_epoch))
        print(volume_manager.data_dimension, hidden_sizes,
              batch_scheduler.target_size)

    with Timer("Creating model"):
        hyperparams = {
            'model': 'gru_mixture',
            'n_gaussians': 2,
            'SGD': "1e-2",
            'hidden_sizes': hidden_sizes,
            'learn_to_stop': False,
            'normalize': False,
            'feed_previous_direction': False
        }
        model = factories.model_factory(
            hyperparams,
            input_size=volume_manager.data_dimension,
            output_size=batch_scheduler.target_size,
            volume_manager=volume_manager)
        model.initialize(
            factories.weigths_initializer_factory("orthogonal", seed=1234))

    # Test fprop with missing streamlines from one subject in a batch
    output = model.get_output(trainset.symb_inputs)
    fct = theano.function([trainset.symb_inputs],
                          output,
                          updates=model.graph_updates)

    batch_inputs, batch_targets, batch_mask = batch_scheduler._next_batch(2)
    out = fct(batch_inputs)

    with Timer("Building optimizer"):
        loss = factories.loss_factory(hyperparams, model, trainset)
        optimizer = factories.optimizer_factory(hyperparams, loss)

    fct_loss = theano.function(
        [trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask],
        loss.loss,
        updates=model.graph_updates)

    loss_value = fct_loss(batch_inputs, batch_targets, batch_mask)
    print("Loss:", loss_value)

    fct_optim = theano.function(
        [trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask],
        list(optimizer.directions.values()),
        updates=model.graph_updates)

    dirs = fct_optim(batch_inputs, batch_targets, batch_mask)
def main():
    parser = build_parser()
    args = parser.parse_args()
    print(args)

    # Get experiment folder
    experiment_path = args.name
    if not os.path.isdir(experiment_path):
        # If not a directory, it must be the name of the experiment.
        experiment_path = pjoin(".", "experiments", args.name)

    if not os.path.isdir(experiment_path):
        parser.error("Cannot find experiment: {0}!".format(args.name))

    # Load experiments hyperparameters
    try:
        hyperparams = smartutils.load_dict_from_json_file(pjoin(experiment_path, "hyperparams.json"))
    except FileNotFoundError:
        hyperparams = smartutils.load_dict_from_json_file(pjoin(experiment_path, "..", "hyperparams.json"))

    with Timer("Loading dataset", newline=True):
        volume_manager = VolumeManager()
        dataset = datasets.load_tractography_dataset(
            [args.subject], volume_manager, name="dataset", use_sh_coeffs=hyperparams["use_sh_coeffs"]
        )
        print("Dataset size:", len(dataset))

    with Timer("Loading model"):
        if hyperparams["model"] == "gru_regression":
            from learn2track.models import GRU_Regression

            model = GRU_Regression.create(experiment_path, volume_manager=volume_manager)
        else:
            raise NameError("Unknown model: {}".format(hyperparams["model"]))

    with Timer("Building evaluation function"):
        loss = loss_factory(hyperparams, model, dataset)
        batch_scheduler = batch_schedulers.TractographyBatchScheduler(
            dataset,
            batch_size=1000,
            noisy_streamlines_sigma=None,
            use_data_augment=False,  # Otherwise it doubles the number of losses :-/
            seed=1234,
            shuffle_streamlines=False,
            normalize_target=hyperparams["normalize"],
        )

        loss_view = views.LossView(loss=loss, batch_scheduler=batch_scheduler)
        losses = loss_view.losses.view()

    with Timer("Saving streamlines"):
        tractogram = Tractogram(dataset.streamlines, affine_to_rasmm=dataset.subjects[0].signal.affine)
        tractogram.data_per_streamline["loss"] = losses
        nib.streamlines.save(tractogram, args.out)
def test_gru_regression_fprop():
    hidden_sizes = 50

    with Timer("Creating dataset", newline=True):
        volume_manager = neurotools.VolumeManager()
        trainset = make_dummy_dataset(volume_manager)
        print("Dataset sizes:", len(trainset))

        batch_scheduler = batch_schedulers.TractographyBatchScheduler(trainset,
                                                                      batch_size=16,
                                                                      noisy_streamlines_sigma=None,
                                                                      seed=1234)
        print ("An epoch will be composed of {} updates.".format(batch_scheduler.nb_updates_per_epoch))
        print (volume_manager.data_dimension, hidden_sizes, batch_scheduler.target_size)

    with Timer("Creating model"):
        hyperparams = {'model': 'gru_regression',
                       'SGD': "1e-2",
                       'hidden_sizes': hidden_sizes,
                       'learn_to_stop': False,
                       'normalize': False}
        model = factories.model_factory(hyperparams,
                                        input_size=volume_manager.data_dimension,
                                        output_size=batch_scheduler.target_size,
                                        volume_manager=volume_manager)
        model.initialize(factories.weigths_initializer_factory("orthogonal", seed=1234))


    # Test fprop with missing streamlines from one subject in a batch
    output = model.get_output(trainset.symb_inputs)
    fct = theano.function([trainset.symb_inputs], output, updates=model.graph_updates)

    batch_inputs, batch_targets, batch_mask = batch_scheduler._next_batch(2)
    out = fct(batch_inputs)

    with Timer("Building optimizer"):
        loss = factories.loss_factory(hyperparams, model, trainset)
        optimizer = factories.optimizer_factory(hyperparams, loss)


    fct_loss = theano.function([trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask],
                                loss.loss,
                                updates=model.graph_updates)

    loss_value = fct_loss(batch_inputs, batch_targets, batch_mask)
    print("Loss:", loss_value)


    fct_optim = theano.function([trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask],
                                list(optimizer.directions.values()),
                                updates=model.graph_updates)

    dirs = fct_optim(batch_inputs, batch_targets, batch_mask)
Example #4
0
def compute_loss_errors(streamlines, model, hyperparams):
    # Create dummy dataset for these new streamlines.
    tracto_data = neurotools.TractographyData(None, None, None)
    tracto_data.add(streamlines, bundle_name="Generated")
    tracto_data.subject_id = 0
    dataset = datasets.TractographyDataset([tracto_data], "Generated", keep_on_cpu=True)

    # Override K for gru_multistep
    if 'k' in hyperparams:
        hyperparams['k'] = 1
    batch_scheduler = batch_scheduler_factory(hyperparams, dataset, train_mode=False, batch_size_override=1000, use_data_augment=False)
    loss = loss_factory(hyperparams, model, dataset)
    loss_view = views.LossView(loss=loss, batch_scheduler=batch_scheduler)
    return loss_view.losses.view()
Example #5
0
def compute_loss_errors(streamlines, model, hyperparams):
    # Create dummy dataset for these new streamlines.
    tracto_data = neurotools.TractographyData(None, None, None)
    tracto_data.add(streamlines, bundle_name="Generated")
    tracto_data.subject_id = 0
    dataset = datasets.TractographyDataset([tracto_data],
                                           "Generated",
                                           keep_on_cpu=True)

    # Override K for gru_multistep
    if 'k' in hyperparams:
        hyperparams['k'] = 1
    batch_scheduler = batch_scheduler_factory(hyperparams,
                                              dataset,
                                              train_mode=False,
                                              batch_size_override=1000,
                                              use_data_augment=False)
    loss = loss_factory(hyperparams, model, dataset)
    loss_view = views.LossView(loss=loss, batch_scheduler=batch_scheduler)
    return loss_view.losses.view()
Example #6
0
def test_gru_multistep_fprop_k3():
    hidden_sizes = 50

    hyperparams = {
        'model': 'gru_multistep',
        'k': 3,
        'm': 3,
        'batch_size': 16,
        'SGD': "1e-2",
        'hidden_sizes': hidden_sizes,
        'learn_to_stop': False,
        'normalize': False,
        'noisy_streamlines_sigma': None,
        'shuffle_streamlines': True,
        'seed': 1234
    }

    with Timer("Creating dataset", newline=True):
        volume_manager = neurotools.VolumeManager()
        trainset = make_dummy_dataset(volume_manager)
        print("Dataset sizes:", len(trainset))

        batch_scheduler = factories.batch_scheduler_factory(hyperparams,
                                                            trainset,
                                                            train_mode=True)
        print("An epoch will be composed of {} updates.".format(
            batch_scheduler.nb_updates_per_epoch))
        print(volume_manager.data_dimension, hidden_sizes,
              batch_scheduler.target_size)

    with Timer("Creating model"):
        model = factories.model_factory(
            hyperparams,
            input_size=volume_manager.data_dimension,
            output_size=batch_scheduler.target_size,
            volume_manager=volume_manager)
        model.initialize(
            factories.weigths_initializer_factory("orthogonal", seed=1234))

    # Test fprop
    output = model.get_output(trainset.symb_inputs)
    fct = theano.function([trainset.symb_inputs],
                          output,
                          updates=model.graph_updates)

    batch_inputs, batch_targets, batch_mask = batch_scheduler._next_batch(2)
    out = fct(batch_inputs)

    with Timer("Building optimizer"):
        loss = factories.loss_factory(hyperparams, model, trainset)
        optimizer = factories.optimizer_factory(hyperparams, loss)

    fct_loss = theano.function(
        [trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask],
        loss.loss,
        updates=model.graph_updates)

    loss_value = fct_loss(batch_inputs, batch_targets, batch_mask)
    print("Loss:", loss_value)

    fct_optim = theano.function(
        [trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask],
        list(optimizer.directions.values()),
        updates=model.graph_updates)

    dirs = fct_optim(batch_inputs, batch_targets, batch_mask)
Example #7
0
def main():
    parser = build_parser()
    args = parser.parse_args()
    print(args)

    if min(args.keep_top) < 0:
        parser.error("--keep-top must be between in [0, 1].")

    # Get experiment folder
    experiment_path = args.name
    if not os.path.isdir(experiment_path):
        # If not a directory, it must be the name of the experiment.
        experiment_path = pjoin(".", "experiments", args.name)

    if not os.path.isdir(experiment_path):
        parser.error('Cannot find experiment: {0}!'.format(args.name))

    # Load experiments hyperparameters
    try:
        hyperparams = smartutils.load_dict_from_json_file(
            pjoin(experiment_path, "hyperparams.json"))
    except FileNotFoundError:
        hyperparams = smartutils.load_dict_from_json_file(
            pjoin(experiment_path, "..", "hyperparams.json"))

    # Use this for hyperparams added in a new version, but nonexistent from older versions
    retrocompatibility_defaults = {
        'feed_previous_direction': False,
        'predict_offset': False,
        'normalize': False,
        'keep_step_size': False,
        'sort_streamlines': False
    }
    for new_hyperparams, default_value in retrocompatibility_defaults.items():
        if new_hyperparams not in hyperparams:
            hyperparams[new_hyperparams] = default_value

    with Timer("Loading signal data and tractogram", newline=True):
        volume_manager = VolumeManager()
        dataset = datasets.load_tractography_dataset_from_dwi_and_tractogram(
            args.signal,
            args.tractogram,
            volume_manager,
            use_sh_coeffs=hyperparams['use_sh_coeffs'],
            bvals=args.bvals,
            bvecs=args.bvecs,
            step_size=args.step_size)
        print("Dataset size:", len(dataset))

        if vizu_available and args.vizu:
            vizu.check_dataset_integrity(dataset, subset=0.2)

    with Timer("Loading model"):
        loss_type = args.loss_type
        model = None
        if hyperparams['model'] == 'gru_regression':
            from learn2track.models import GRU_Regression
            model = GRU_Regression.create(experiment_path,
                                          volume_manager=volume_manager)
        elif hyperparams['model'] == 'gru_mixture':
            from learn2track.models import GRU_Mixture
            model = GRU_Mixture.create(experiment_path,
                                       volume_manager=volume_manager)
        elif hyperparams['model'] == 'gru_multistep':
            from learn2track.models import GRU_Multistep_Gaussian
            model = GRU_Multistep_Gaussian.create(
                experiment_path, volume_manager=volume_manager)
            model.k = 1
            model.m = 1
        elif hyperparams['model'] == 'ffnn_regression':
            from learn2track.models import FFNN_Regression
            model = FFNN_Regression.create(experiment_path,
                                           volume_manager=volume_manager)

            if loss_type in ['l2_sum', 'l2_mean']:
                loss_type = "expected_value"

        else:
            raise NameError("Unknown model: {}".format(hyperparams['model']))

    with Timer("Building evaluation function"):
        # Override K for gru_multistep
        if 'k' in hyperparams:
            hyperparams['k'] = 1

        batch_scheduler = batch_scheduler_factory(
            hyperparams,
            dataset,
            use_data_augment=
            False,  # Otherwise it doubles the number of losses :-/
            train_mode=False,
            batch_size_override=args.batch_size)
        loss = loss_factory(hyperparams, model, dataset, loss_type=loss_type)
        l2_error = views.LossView(loss=loss, batch_scheduler=batch_scheduler)

    with Timer("Scoring...", newline=True):
        dummy_status = Status()  # Forces recomputing results
        losses = l2_error.losses.view(dummy_status)

        if hyperparams['model'] == 'ffnn_regression':
            _losses = dataset.streamlines.copy()
            _losses._data = losses.copy()
            _losses._lengths -= 1
            _losses._offsets -= np.arange(len(dataset.streamlines))

            if args.loss_type == 'l2_sum':
                losses = np.asarray([l.sum() for l in _losses])
            elif args.loss_type == 'l2_mean':
                losses = np.asarray([l.mean() for l in _losses])

        mean = float(l2_error.mean.view(dummy_status))
        stderror = float(l2_error.stderror.view(dummy_status))

        print("Loss: {:.4f} ± {:.4f}".format(mean, stderror))
        print("Min: {:.4f}".format(losses.min()))
        print("Max: {:.4f}".format(losses.max()))
        print("Percentiles: {}".format(
            np.percentile(losses, [0, 25, 50, 75, 100])))

    with Timer("Saving streamlines"):
        nii = dataset.subjects[0].signal
        tractogram = nib.streamlines.Tractogram(
            dataset.streamlines[batch_scheduler.indices],
            affine_to_rasmm=nii.affine)
        tractogram.data_per_streamline['loss'] = losses

        header = {}
        header[Field.VOXEL_TO_RASMM] = nii.affine.copy()
        header[Field.VOXEL_SIZES] = nii.header.get_zooms()[:3]
        header[Field.DIMENSIONS] = nii.shape[:3]
        header[Field.VOXEL_ORDER] = "".join(aff2axcodes(nii.affine))

        nib.streamlines.save(tractogram.copy(), args.out, header=header)

    if len(args.keep_top) > 0:
        for keep_top in args.keep_top:
            with Timer("Saving top {}% streamlines".format(keep_top)):
                idx = np.argsort(losses)
                idx = idx[:int(keep_top * len(losses))]
                print("Keeping {}/{} streamlines".format(
                    len(idx), len(losses)))
                sub_tractogram = tractogram[idx]
                out_filename = args.out[:-4] + "_top{}".format(
                    keep_top) + ".tck"
                nib.streamlines.save(sub_tractogram, out_filename)
def prediction_tractogram(hyperparams, model, dataset, batch_size_override,
                          prediction_method):
    loss = loss_factory(hyperparams,
                        model,
                        dataset,
                        loss_type=prediction_method)
    batch_scheduler = batch_scheduler_factory(
        hyperparams,
        dataset,
        train_mode=False,
        batch_size_override=batch_size_override,
        use_data_augment=False)

    _ = loss.losses  # Hack to generate update dict in loss :(
    predictions = loss.samples

    predict, timestep_losses, inputs, targets, masks = log_variables(
        batch_scheduler, model, predictions, loss.loss_per_time_step,
        dataset.symb_inputs * 1, dataset.symb_targets * 1,
        dataset.symb_mask * 1)
    if hyperparams['model'] == 'ffnn_regression':
        # Regrouping data into streamlines will only work if the original streamlines were NOT shuffled, resampled or augmented
        timesteps_prediction = ArraySequence()
        timesteps_loss = ArraySequence()
        timesteps_inputs = ArraySequence()
        timesteps_targets = ArraySequence()
        idx = 0
        for length in dataset.streamlines._lengths:
            start = idx
            idx = end = idx + length
            timesteps_prediction.extend(predict[start:end])
            timesteps_loss.extend(timestep_losses[start:end])
            timesteps_inputs.extend(inputs[start:end])
            timesteps_targets.extend(targets[start:end])
    else:
        timesteps_prediction = ArraySequence(
            [p[:int(m.sum())] for p, m in zip(chain(*predict), chain(*masks))])
        timesteps_loss = ArraySequence([
            l[:int(m.sum())]
            for l, m in zip(chain(*timestep_losses), chain(*masks))
        ])
        timesteps_inputs = ArraySequence(
            [i[:int(m.sum())] for i, m in zip(chain(*inputs), chain(*masks))])
        # Use np.squeeze in case gru_multistep is used to remove the empty k=1 dimension
        timesteps_targets = ArraySequence([
            np.squeeze(t[:int(m.sum())])
            for t, m in zip(chain(*targets), chain(*masks))
        ])

    # Debug : Print norm stats
    # print("Dataset: {}; # of streamlines: {}".format(dataset.name, len(dataset)))
    # all_predictions = np.array(list(chain(*timesteps_prediction)))
    # prediction_norms = np.linalg.norm(all_predictions, axis=1)
    # print("Prediction norm --- Mean:{}; Max:{}; Min:{}".format(np.mean(prediction_norms), np.max(prediction_norms), np.min(prediction_norms)))
    # all_targets = np.array(list(chain(*timesteps_targets)))
    # target_norms = np.linalg.norm(all_targets, axis=1)
    # print("Target norm --- Mean:{}; Max:{}; Min:{}".format(np.mean(target_norms), np.max(target_norms), np.min(target_norms)))

    # Color is based on timestep loss
    cmap = cm.get_cmap('bwr')
    values = np.concatenate(timesteps_loss)
    vmin = np.percentile(values, 5)
    vmax = np.percentile(values, 95)
    scalar_map = cm.ScalarMappable(norm=mplcolors.Normalize(vmin=vmin,
                                                            vmax=vmax),
                                   cmap=cmap)

    streamlines = []
    colors = []

    for i, t, p, l in zip(timesteps_inputs, timesteps_targets,
                          timesteps_prediction, timesteps_loss):
        pts = np.r_[i[:, :3], [i[-1, :3] + t[-1]]]

        streamline = np.zeros(((len(pts) - 1) * 3 + 1, 3))
        streamline[::3] = pts
        streamline[1:-1:3] = pts[:-1] + p
        streamline[2:-1:3] = pts[:-1]
        streamlines.append(streamline)

        # Color input streamlines in a uniform color, then color predictions based on L2 error
        color = np.zeros_like(streamline)

        # Base color of streamlines is minimum value (best score)
        color[:] = scalar_map.to_rgba(vmin, bytes=True)[:3]
        color[1:-1:3, :] = scalar_map.to_rgba(l, bytes=True)[:, :3]
        colors.append(color)

    tractogram = nib.streamlines.Tractogram(streamlines,
                                            data_per_point={"colors": colors})
    return tractogram
def evaluation_tractogram(hyperparams, model, dataset, batch_size_override,
                          metric):
    loss = loss_factory(hyperparams, model, dataset, loss_type=None)
    batch_scheduler = batch_scheduler_factory(
        hyperparams,
        dataset,
        train_mode=False,
        batch_size_override=batch_size_override,
        use_data_augment=False)

    _ = loss.losses  # Hack to generate update dict in loss :(

    if hyperparams['model'] == 'ffnn_regression':
        timestep_losses, inputs, targets = log_variables(
            batch_scheduler, model, loss.loss_per_time_step,
            dataset.symb_inputs * 1, dataset.symb_targets * 1)
        # Regrouping data into streamlines will only work if the original streamlines were NOT shuffled, resampled or augmented
        timesteps_loss = ArraySequence()
        seq_loss = []
        timesteps_inputs = ArraySequence()
        timesteps_targets = ArraySequence()
        idx = 0
        for length in dataset.streamlines._lengths:
            start = idx
            idx = end = idx + length
            timesteps_loss.extend(timestep_losses[start:end])
            seq_loss.extend(np.mean(timestep_losses[start:end]))
            timesteps_inputs.extend(inputs[start:end])
            timesteps_targets.extend(targets[start:end])
    else:

        timestep_losses, seq_losses, inputs, targets, masks = log_variables(
            batch_scheduler, model, loss.loss_per_time_step, loss.loss_per_seq,
            dataset.symb_inputs * 1, dataset.symb_targets * 1,
            dataset.symb_mask * 1)
        timesteps_loss = ArraySequence([
            l[:int(m.sum())]
            for l, m in zip(chain(*timestep_losses), chain(*masks))
        ])
        seq_loss = np.array(list(chain(*seq_losses)))
        timesteps_inputs = ArraySequence(
            [i[:int(m.sum())] for i, m in zip(chain(*inputs), chain(*masks))])
        # Use np.squeeze in case gru_multistep is used to remove the empty k=1 dimension
        timesteps_targets = ArraySequence([
            np.squeeze(t[:int(m.sum())])
            for t, m in zip(chain(*targets), chain(*masks))
        ])

    if metric == 'sequence':
        # Color is based on sequence loss
        values = seq_loss
    elif metric == 'timestep' or metric == 'cumul_avg':
        # Color is based on timestep loss
        values = np.concatenate(timesteps_loss)
    else:
        raise ValueError("Unrecognized metric: {}".format(metric))

    cmap = cm.get_cmap('bwr')
    vmin = np.percentile(values, 5)
    vmax = np.percentile(values, 95)
    scalar_map = cm.ScalarMappable(norm=mplcolors.Normalize(vmin=vmin,
                                                            vmax=vmax),
                                   cmap=cmap)

    streamlines = []
    colors = []

    for i, t, l, seq_l in zip(timesteps_inputs, timesteps_targets,
                              timesteps_loss, seq_loss):
        pts = np.r_[i[:, :3], [i[-1, :3] + t[-1]]]

        color = np.zeros_like(pts)
        if metric == 'sequence':
            # Streamline color is based on sequence loss
            color[:, :] = scalar_map.to_rgba(seq_l, bytes=True)[:3]
        elif metric == 'timestep':
            # Streamline color is based on timestep loss
            # Identify first point with green
            color[0, :] = [0, 255, 0]
            color[1:, :] = scalar_map.to_rgba(l, bytes=True)[:, :3]
        elif metric == 'cumul_avg':
            # Streamline color is based on timestep loss

            # Compute cumulative average
            cumul_avg = np.cumsum(l) / np.arange(1, len(l) + 1)

            # Identify first point with green
            color[0, :] = [0, 255, 0]
            color[1:, :] = scalar_map.to_rgba(cumul_avg, bytes=True)[:, :3]
        else:
            raise ValueError("Unrecognized metric: {}".format(metric))

        streamlines.append(pts)
        colors.append(color)

    tractogram = nib.streamlines.Tractogram(streamlines,
                                            data_per_point={"colors": colors})
    return tractogram
Example #10
0
def main():
    parser = build_argparser()
    args = parser.parse_args()
    print(args)
    print("Using Theano v.{}".format(theano.version.short_version))

    hyperparams_to_exclude = ['max_epoch', 'force', 'name', 'view', 'shuffle_streamlines']
    # Use this for hyperparams added in a new version, but nonexistent from older versions
    retrocompatibility_defaults = {'feed_previous_direction': False,
                                   'normalize': False}
    experiment_path, hyperparams, resuming = utils.maybe_create_experiment_folder(args, exclude=hyperparams_to_exclude,
                                                                                  retrocompatibility_defaults=retrocompatibility_defaults)

    # Log the command currently running.
    with open(pjoin(experiment_path, 'cmd.txt'), 'a') as f:
        f.write(" ".join(sys.argv) + "\n")

    print("Resuming:" if resuming else "Creating:", experiment_path)

    with Timer("Loading dataset", newline=True):
        trainset_volume_manager = VolumeManager()
        validset_volume_manager = VolumeManager()
        trainset = datasets.load_tractography_dataset(args.train_subjects, trainset_volume_manager, name="trainset", use_sh_coeffs=args.use_sh_coeffs)
        validset = datasets.load_tractography_dataset(args.valid_subjects, validset_volume_manager, name="validset", use_sh_coeffs=args.use_sh_coeffs)
        print("Dataset sizes:", len(trainset), " |", len(validset))

        if args.view:
            tsne_view(trainset, trainset_volume_manager)
            sys.exit(0)

        batch_scheduler = batch_scheduler_factory(hyperparams, dataset=trainset, train_mode=True)
        print("An epoch will be composed of {} updates.".format(batch_scheduler.nb_updates_per_epoch))
        print(trainset_volume_manager.data_dimension, args.hidden_sizes, batch_scheduler.target_size)

    with Timer("Creating model"):
        input_size = trainset_volume_manager.data_dimension
        if hyperparams['feed_previous_direction']:
            input_size += 3

        model = model_factory(hyperparams,
                              input_size=input_size,
                              output_size=batch_scheduler.target_size,
                              volume_manager=trainset_volume_manager)
        model.initialize(weigths_initializer_factory(args.weights_initialization,
                                                     seed=args.initialization_seed))

    with Timer("Building optimizer"):
        loss = loss_factory(hyperparams, model, trainset)

        if args.clip_gradient is not None:
            loss.append_gradient_modifier(DirectionClipping(threshold=args.clip_gradient))

        optimizer = optimizer_factory(hyperparams, loss)

    with Timer("Building trainer"):
        trainer = Trainer(optimizer, batch_scheduler)

        # Log training error
        loss_monitor = views.MonitorVariable(loss.loss)
        avg_loss = tasks.AveragePerEpoch(loss_monitor)
        trainer.append_task(avg_loss)

        # Print average training loss.
        trainer.append_task(tasks.Print("Avg. training loss:         : {}", avg_loss))

        # if args.learn_to_stop:
        #     l2err_monitor = views.MonitorVariable(T.mean(loss.mean_sqr_error))
        #     avg_l2err = tasks.AveragePerEpoch(l2err_monitor)
        #     trainer.append_task(avg_l2err)
        #
        #     crossentropy_monitor = views.MonitorVariable(T.mean(loss.cross_entropy))
        #     avg_crossentropy = tasks.AveragePerEpoch(crossentropy_monitor)
        #     trainer.append_task(avg_crossentropy)
        #
        #     trainer.append_task(tasks.Print("Avg. training L2 err:       : {}", avg_l2err))
        #     trainer.append_task(tasks.Print("Avg. training stopping:     : {}", avg_crossentropy))
        #     trainer.append_task(tasks.Print("L2 err : {0:.4f}", l2err_monitor, each_k_update=100))
        #     trainer.append_task(tasks.Print("stopping : {0:.4f}", crossentropy_monitor, each_k_update=100))

        # Print NLL mean/stderror.
        # train_loss = L2DistanceForSequences(model, trainset)
        # train_batch_scheduler = StreamlinesBatchScheduler(trainset, batch_size=1000,
        #                                                   noisy_streamlines_sigma=None,
        #                                                   nb_updates_per_epoch=None,
        #                                                   seed=1234)

        # train_error = views.LossView(loss=train_loss, batch_scheduler=train_batch_scheduler)
        # trainer.append_task(tasks.Print("Trainset - Error        : {0:.2f} | {1:.2f}", train_error.sum, train_error.mean))

        # HACK: To make sure all subjects in the volume_manager are used in a batch, we have to split the trainset/validset in 2 volume managers
        model.volume_manager = validset_volume_manager
        valid_loss = loss_factory(hyperparams, model, validset)
        valid_batch_scheduler = batch_scheduler_factory(hyperparams,
                                                        dataset=validset,
                                                        train_mode=False)

        valid_error = views.LossView(loss=valid_loss, batch_scheduler=valid_batch_scheduler)
        trainer.append_task(tasks.Print("Validset - Error        : {0:.2f} | {1:.2f}", valid_error.sum, valid_error.mean))

        # HACK: Restore trainset volume manager
        model.volume_manager = trainset_volume_manager

        lookahead_loss = valid_error.sum

        direction_norm = views.MonitorVariable(T.sqrt(sum(map(lambda d: T.sqr(d).sum(), loss.gradients.values()))))
        # trainer.append_task(tasks.Print("||d|| : {0:.4f}", direction_norm))

        # logger = tasks.Logger(train_error.mean, valid_error.mean, valid_error.sum, direction_norm)
        logger = tasks.Logger(valid_error.mean, valid_error.sum, direction_norm)
        trainer.append_task(logger)

        if args.view:
            import pylab as plt

            def _plot(*args, **kwargs):
                plt.figure(1)
                plt.clf()
                plt.show(False)
                plt.subplot(121)
                plt.plot(np.array(logger.get_variable_history(0)).flatten(), label="Train")
                plt.plot(np.array(logger.get_variable_history(1)).flatten(), label="Valid")
                plt.legend()

                plt.subplot(122)
                plt.plot(np.array(logger.get_variable_history(3)).flatten(), label="||d'||")
                plt.draw()

            trainer.append_task(tasks.Callback(_plot))

        # Callback function to stop training if NaN is detected.
        def detect_nan(obj, status):
            if np.isnan(model.parameters[0].get_value().sum()):
                print("NaN detected! Stopping training now.")
                sys.exit()

        trainer.append_task(tasks.Callback(detect_nan, each_k_update=1))

        # Callback function to save training progression.
        def save_training(obj, status):
            trainer.save(experiment_path)

        trainer.append_task(tasks.Callback(save_training))

        # Early stopping with a callback for saving every time model improves.
        def save_improvement(obj, status):
            """ Save best model and training progression. """
            if np.isnan(model.parameters[0].get_value().sum()):
                print("NaN detected! Not saving the model. Crashing now.")
                sys.exit()

            print("*** Best epoch: {0} ***\n".format(obj.best_epoch))
            model.save(experiment_path)

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())
        trainer.append_task(tasks.PrintTime(each_k_update=100))  # Profiling

        # Add stopping criteria
        trainer.append_task(stopping_criteria.MaxEpochStopping(args.max_epoch))
        early_stopping = stopping_criteria.EarlyStopping(lookahead_loss, lookahead=args.lookahead, eps=args.lookahead_eps, callback=save_improvement)
        trainer.append_task(early_stopping)

    with Timer("Compiling Theano graph"):
        trainer.build_theano_graph()

    if resuming:
        if not os.path.isdir(pjoin(experiment_path, 'training')):
            print("No 'training/' folder. Assuming it failed before"
                  " the end of the first epoch. Starting a new training.")
        else:
            with Timer("Loading"):
                trainer.load(experiment_path)

    with Timer("Training"):
        trainer.train()
Example #11
0
def main():
    parser = build_argparser()
    args = parser.parse_args()
    print(args)
    print("Using Theano v.{}".format(theano.version.short_version))

    hyperparams_to_exclude = ['max_epoch', 'force', 'name', 'view', 'shuffle_streamlines']
    # Use this for hyperparams added in a new version, but nonexistent from older versions
    retrocompatibility_defaults = {'feed_previous_direction': False,
                                   'predict_offset': False,
                                   'normalize': False,
                                   'sort_streamlines': False,
                                   'keep_step_size': False,
                                   'use_layer_normalization': False,
                                   'drop_prob': 0.,
                                   'use_zoneout': False,
                                   'skip_connections': False}
    experiment_path, hyperparams, resuming = utils.maybe_create_experiment_folder(args, exclude=hyperparams_to_exclude,
                                                                                  retrocompatibility_defaults=retrocompatibility_defaults)

    # Log the command currently running.
    with open(pjoin(experiment_path, 'cmd.txt'), 'a') as f:
        f.write(" ".join(sys.argv) + "\n")

    print("Resuming:" if resuming else "Creating:", experiment_path)

    with Timer("Loading dataset", newline=True):
        trainset_volume_manager = VolumeManager()
        validset_volume_manager = VolumeManager()
        trainset = datasets.load_tractography_dataset(args.train_subjects, trainset_volume_manager, name="trainset",
                                                      use_sh_coeffs=args.use_sh_coeffs)
        validset = datasets.load_tractography_dataset(args.valid_subjects, validset_volume_manager, name="validset",
                                                      use_sh_coeffs=args.use_sh_coeffs)
        print("Dataset sizes:", len(trainset), " |", len(validset))

        batch_scheduler = batch_scheduler_factory(hyperparams, dataset=trainset, train_mode=True)
        print("An epoch will be composed of {} updates.".format(batch_scheduler.nb_updates_per_epoch))
        print(trainset_volume_manager.data_dimension, args.hidden_sizes, batch_scheduler.target_size)

    with Timer("Creating model"):
        input_size = trainset_volume_manager.data_dimension
        if hyperparams['feed_previous_direction']:
            input_size += 3

        model = model_factory(hyperparams,
                              input_size=input_size,
                              output_size=batch_scheduler.target_size,
                              volume_manager=trainset_volume_manager)
        model.initialize(weigths_initializer_factory(args.weights_initialization,
                                                     seed=args.initialization_seed))

    with Timer("Building optimizer"):
        loss = loss_factory(hyperparams, model, trainset)

        if args.clip_gradient is not None:
            loss.append_gradient_modifier(DirectionClipping(threshold=args.clip_gradient))

        optimizer = optimizer_factory(hyperparams, loss)

    with Timer("Building trainer"):
        trainer = Trainer(optimizer, batch_scheduler)

        # Log training error
        loss_monitor = views.MonitorVariable(loss.loss)
        avg_loss = tasks.AveragePerEpoch(loss_monitor)
        trainer.append_task(avg_loss)

        # Print average training loss.
        trainer.append_task(tasks.Print("Avg. training loss:         : {}", avg_loss))

        # if args.learn_to_stop:
        #     l2err_monitor = views.MonitorVariable(T.mean(loss.mean_sqr_error))
        #     avg_l2err = tasks.AveragePerEpoch(l2err_monitor)
        #     trainer.append_task(avg_l2err)
        #
        #     crossentropy_monitor = views.MonitorVariable(T.mean(loss.cross_entropy))
        #     avg_crossentropy = tasks.AveragePerEpoch(crossentropy_monitor)
        #     trainer.append_task(avg_crossentropy)
        #
        #     trainer.append_task(tasks.Print("Avg. training L2 err:       : {}", avg_l2err))
        #     trainer.append_task(tasks.Print("Avg. training stopping:     : {}", avg_crossentropy))
        #     trainer.append_task(tasks.Print("L2 err : {0:.4f}", l2err_monitor, each_k_update=100))
        #     trainer.append_task(tasks.Print("stopping : {0:.4f}", crossentropy_monitor, each_k_update=100))

        # Print NLL mean/stderror.
        # train_loss = L2DistanceForSequences(model, trainset)
        # train_batch_scheduler = StreamlinesBatchScheduler(trainset, batch_size=1000,
        #                                                   noisy_streamlines_sigma=None,
        #                                                   nb_updates_per_epoch=None,
        #                                                   seed=1234)

        # train_error = views.LossView(loss=train_loss, batch_scheduler=train_batch_scheduler)
        # trainer.append_task(tasks.Print("Trainset - Error        : {0:.2f} | {1:.2f}", train_error.sum, train_error.mean))

        # HACK: To make sure all subjects in the volume_manager are used in a batch, we have to split the trainset/validset in 2 volume managers
        model.volume_manager = validset_volume_manager
        model.drop_prob = 0.  # Do not use dropout/zoneout for evaluation
        valid_loss = loss_factory(hyperparams, model, validset)
        valid_batch_scheduler = batch_scheduler_factory(hyperparams,
                                                        dataset=validset,
                                                        train_mode=False)

        valid_error = views.LossView(loss=valid_loss, batch_scheduler=valid_batch_scheduler)
        trainer.append_task(tasks.Print("Validset - Error        : {0:.2f} | {1:.2f}", valid_error.sum, valid_error.mean))

        if hyperparams['model'] == 'ffnn_regression':
            valid_batch_scheduler2 = batch_scheduler_factory(hyperparams,
                                                             dataset=validset,
                                                             train_mode=False)

            valid_l2 = loss_factory(hyperparams, model, validset, loss_type="expected_value")
            valid_l2_error = views.LossView(loss=valid_l2, batch_scheduler=valid_batch_scheduler2)
            trainer.append_task(tasks.Print("Validset - {}".format(valid_l2.__class__.__name__) + "\t: {0:.2f} | {1:.2f}", valid_l2_error.sum, valid_l2_error.mean))

        # HACK: Restore trainset volume manager
        model.volume_manager = trainset_volume_manager
        model.drop_prob = hyperparams['drop_prob']  # Restore dropout

        lookahead_loss = valid_error.sum

        direction_norm = views.MonitorVariable(T.sqrt(sum(map(lambda d: T.sqr(d).sum(), loss.gradients.values()))))
        # trainer.append_task(tasks.Print("||d|| : {0:.4f}", direction_norm))

        # logger = tasks.Logger(train_error.mean, valid_error.mean, valid_error.sum, direction_norm)
        logger = tasks.Logger(valid_error.mean, valid_error.sum, direction_norm)
        trainer.append_task(logger)

        if args.view:
            import pylab as plt

            def _plot(*args, **kwargs):
                plt.figure(1)
                plt.clf()
                plt.show(False)
                plt.subplot(121)
                plt.plot(np.array(logger.get_variable_history(0)).flatten(), label="Train")
                plt.plot(np.array(logger.get_variable_history(1)).flatten(), label="Valid")
                plt.legend()

                plt.subplot(122)
                plt.plot(np.array(logger.get_variable_history(3)).flatten(), label="||d'||")
                plt.draw()

            trainer.append_task(tasks.Callback(_plot))

        # Callback function to stop training if NaN is detected.
        def detect_nan(obj, status):
            if np.isnan(model.parameters[0].get_value().sum()):
                print("NaN detected! Stopping training now.")
                sys.exit()

        trainer.append_task(tasks.Callback(detect_nan, each_k_update=1))

        # Callback function to save training progression.
        def save_training(obj, status):
            trainer.save(experiment_path)

        trainer.append_task(tasks.Callback(save_training))

        # Early stopping with a callback for saving every time model improves.
        def save_improvement(obj, status):
            """ Save best model and training progression. """
            if np.isnan(model.parameters[0].get_value().sum()):
                print("NaN detected! Not saving the model. Crashing now.")
                sys.exit()

            print("*** Best epoch: {0} ***\n".format(obj.best_epoch))
            model.save(experiment_path)

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())
        trainer.append_task(tasks.PrintTime(each_k_update=100))  # Profiling

        # Add stopping criteria
        trainer.append_task(stopping_criteria.MaxEpochStopping(args.max_epoch))
        early_stopping = stopping_criteria.EarlyStopping(lookahead_loss, lookahead=args.lookahead, eps=args.lookahead_eps, callback=save_improvement)
        trainer.append_task(early_stopping)

    with Timer("Compiling Theano graph"):
        trainer.build_theano_graph()

    if resuming:
        if not os.path.isdir(pjoin(experiment_path, 'training')):
            print("No 'training/' folder. Assuming it failed before"
                  " the end of the first epoch. Starting a new training.")
        else:
            with Timer("Loading"):
                trainer.load(experiment_path)

    with Timer("Training"):
        trainer.train()
Example #12
0
def main():
    parser = build_parser()
    args = parser.parse_args()
    print(args)

    # Get experiment folder
    experiment_path = args.name
    if not os.path.isdir(experiment_path):
        # If not a directory, it must be the name of the experiment.
        experiment_path = pjoin(".", "experiments", args.name)

    if not os.path.isdir(experiment_path):
        parser.error('Cannot find experiment: {0}!'.format(args.name))

    # Load experiments hyperparameters
    try:
        hyperparams = smartutils.load_dict_from_json_file(pjoin(experiment_path, "hyperparams.json"))
    except FileNotFoundError:
        hyperparams = smartutils.load_dict_from_json_file(pjoin(experiment_path, "..", "hyperparams.json"))

    with Timer("Loading dataset", newline=True):
        volume_manager = VolumeManager()
        dataset = datasets.load_tractography_dataset(args.subjects, volume_manager, name="dataset", use_sh_coeffs=hyperparams['use_sh_coeffs'])
        print("Dataset size:", len(dataset))

    with Timer("Loading model"):
        model = None
        if hyperparams['model'] == 'gru_regression':
            from learn2track.models import GRU_Regression
            model = GRU_Regression.create(experiment_path, volume_manager=volume_manager)
        elif hyperparams['model'] == 'gru_mixture':
            from learn2track.models import GRU_Mixture
            model = GRU_Mixture.create(experiment_path, volume_manager=volume_manager)
        elif hyperparams['model'] == 'gru_multistep':
            from learn2track.models import GRU_Multistep_Gaussian
            model = GRU_Multistep_Gaussian.create(experiment_path, volume_manager=volume_manager)
            model.k = 1
            model.m = 1
        elif hyperparams['model'] == 'ffnn_regression':
            from learn2track.models import FFNN_Regression
            model = FFNN_Regression.create(experiment_path, volume_manager=volume_manager)
        else:
            raise NameError("Unknown model: {}".format(hyperparams['model']))

    with Timer("Building evaluation function"):
        # Override K for gru_multistep
        if 'k' in hyperparams:
            hyperparams['k'] = 1

        batch_scheduler = batch_scheduler_factory(hyperparams, dataset, train_mode=False, batch_size_override=args.batch_size)
        loss = loss_factory(hyperparams, model, dataset, loss_type=args.loss_type)
        l2_error = views.LossView(loss=loss, batch_scheduler=batch_scheduler)

    with Timer("Evaluating...", newline=True):
        results_file = pjoin(experiment_path, "results.json")
        results = {}
        if os.path.isfile(results_file) and not args.force:
            print("Loading saved results... (use --force to re-run evaluation)")
            results = smartutils.load_dict_from_json_file(results_file)

        tag = ""
        if args.loss_type == 'expected_value' or hyperparams['model'] == 'gru_regression':
            tag = "_EV_L2_error"
        elif args.loss_type == 'maximum_component':
            tag = "_MC_L2_error"
        elif hyperparams['model'] == 'gru_mixture' or hyperparams['model'] == 'gru_multistep':
            tag = "_NLL"

        entry = args.dataset_name + tag

        if entry not in results or args.force:
            with Timer("Evaluating {}".format(entry)):
                dummy_status = Status()  # Forces recomputing results
                results[entry] = {'mean': float(l2_error.mean.view(dummy_status)), 'stderror': float(l2_error.stderror.view(dummy_status))}
                smartutils.save_dict_to_json_file(results_file, results)  # Update results file.

        print("{}: {:.4f} ± {:.4f}".format(entry, results[entry]['mean'], results[entry]['stderror']))
Example #13
0
def main():
    parser = build_parser()
    args = parser.parse_args()
    print(args)

    # Get experiment folder
    experiment_path = args.name
    if not os.path.isdir(experiment_path):
        # If not a directory, it must be the name of the experiment.
        experiment_path = pjoin(".", "experiments", args.name)

    if not os.path.isdir(experiment_path):
        parser.error('Cannot find experiment: {0}!'.format(args.name))

    # Load experiments hyperparameters
    try:
        hyperparams = smartutils.load_dict_from_json_file(
            pjoin(experiment_path, "hyperparams.json"))
    except FileNotFoundError:
        hyperparams = smartutils.load_dict_from_json_file(
            pjoin(experiment_path, "..", "hyperparams.json"))

    # Use this for hyperparams added in a new version, but nonexistent from older versions
    retrocompatibility_defaults = {
        'feed_previous_direction': False,
        'predict_offset': False,
        'normalize': False,
        'keep_step_size': False,
        'sort_streamlines': False,
        'use_layer_normalization': False,
        'drop_prob': 0.,
        'use_zoneout': False
    }
    for new_hyperparams, default_value in retrocompatibility_defaults.items():
        if new_hyperparams not in hyperparams:
            hyperparams[new_hyperparams] = default_value

    with Timer("Loading dataset", newline=True):
        volume_manager = VolumeManager()
        dataset = datasets.load_tractography_dataset(
            args.subjects,
            volume_manager,
            name="dataset",
            use_sh_coeffs=hyperparams['use_sh_coeffs'])
        print("Dataset size:", len(dataset))

    with Timer("Loading model"):
        model = None
        if hyperparams['model'] == 'gru_regression':
            from learn2track.models import GRU_Regression
            model = GRU_Regression.create(experiment_path,
                                          volume_manager=volume_manager)
        elif hyperparams['model'] == 'gru_gaussian':
            from learn2track.models import GRU_Gaussian
            model = GRU_Gaussian.create(experiment_path,
                                        volume_manager=volume_manager)
        elif hyperparams['model'] == 'gru_mixture':
            from learn2track.models import GRU_Mixture
            model = GRU_Mixture.create(experiment_path,
                                       volume_manager=volume_manager)
        elif hyperparams['model'] == 'gru_multistep':
            from learn2track.models import GRU_Multistep_Gaussian
            model = GRU_Multistep_Gaussian.create(
                experiment_path, volume_manager=volume_manager)
            model.k = 1
            model.m = 1
        elif hyperparams['model'] == 'ffnn_regression':
            from learn2track.models import FFNN_Regression
            model = FFNN_Regression.create(experiment_path,
                                           volume_manager=volume_manager)
        else:
            raise NameError("Unknown model: {}".format(hyperparams['model']))
        model.drop_prob = 0.  # Make sure dropout/zoneout is not used when testing

    with Timer("Building evaluation function"):
        # Override K for gru_multistep
        if 'k' in hyperparams:
            hyperparams['k'] = 1

        batch_scheduler = batch_scheduler_factory(
            hyperparams,
            dataset,
            train_mode=False,
            batch_size_override=args.batch_size)
        loss = loss_factory(hyperparams,
                            model,
                            dataset,
                            loss_type=args.loss_type)
        l2_error = views.LossView(loss=loss, batch_scheduler=batch_scheduler)

    with Timer("Evaluating...", newline=True):
        results_file = pjoin(experiment_path, "results.json")
        results = {}
        if os.path.isfile(results_file) and not args.force:
            print(
                "Loading saved results... (use --force to re-run evaluation)")
            results = smartutils.load_dict_from_json_file(results_file)

        tag = ""
        if args.loss_type == 'expected_value' or hyperparams[
                'model'] == 'gru_regression':
            tag = "_EV_L2_error"
        elif args.loss_type == 'maximum_component':
            tag = "_MC_L2_error"
        elif hyperparams['model'] in [
                'gru_gaussian', 'gru_mixture', 'gru_multistep'
        ]:
            tag = "_NLL"

        entry = args.dataset_name + tag

        if entry not in results or args.force:
            with Timer("Evaluating {}".format(entry)):
                dummy_status = Status()  # Forces recomputing results
                results[entry] = {
                    'mean': float(l2_error.mean.view(dummy_status)),
                    'stderror': float(l2_error.stderror.view(dummy_status))
                }
                smartutils.save_dict_to_json_file(
                    results_file, results)  # Update results file.

        print("{}: {:.4f} ± {:.4f}".format(entry, results[entry]['mean'],
                                           results[entry]['stderror']))
def evaluation_tractogram(hyperparams, model, dataset, batch_size_override, metric):
    loss = loss_factory(hyperparams, model, dataset, loss_type=None)
    batch_scheduler = batch_scheduler_factory(hyperparams, dataset, train_mode=False, batch_size_override=batch_size_override, use_data_augment=False)

    _ = loss.losses  # Hack to generate update dict in loss :(

    if hyperparams['model'] == 'ffnn_regression':
        timestep_losses, inputs, targets = log_variables(batch_scheduler,
                                                         model,
                                                         loss.loss_per_time_step,
                                                         dataset.symb_inputs * 1,
                                                         dataset.symb_targets * 1)
        # Regrouping data into streamlines will only work if the original streamlines were NOT shuffled, resampled or augmented
        timesteps_loss = ArraySequence()
        seq_loss = []
        timesteps_inputs = ArraySequence()
        timesteps_targets = ArraySequence()
        idx = 0
        for length in dataset.streamlines._lengths:
            start = idx
            idx = end = idx+length
            timesteps_loss.extend(timestep_losses[start:end])
            seq_loss.extend(np.mean(timestep_losses[start:end]))
            timesteps_inputs.extend(inputs[start:end])
            timesteps_targets.extend(targets[start:end])
    else:

        timestep_losses, seq_losses, inputs, targets, masks = log_variables(batch_scheduler,
                                                                            model,
                                                                            loss.loss_per_time_step,
                                                                            loss.loss_per_seq,
                                                                            dataset.symb_inputs * 1,
                                                                            dataset.symb_targets * 1,
                                                                            dataset.symb_mask * 1)
        timesteps_loss = ArraySequence([l[:int(m.sum())] for l, m in zip(chain(*timestep_losses), chain(*masks))])
        seq_loss = np.array(list(chain(*seq_losses)))
        timesteps_inputs = ArraySequence([i[:int(m.sum())] for i, m in zip(chain(*inputs), chain(*masks))])
        # Use np.squeeze in case gru_multistep is used to remove the empty k=1 dimension
        timesteps_targets = ArraySequence([np.squeeze(t[:int(m.sum())]) for t, m in zip(chain(*targets), chain(*masks))])

    if metric == 'sequence':
        # Color is based on sequence loss
        values = seq_loss
    elif metric == 'timestep' or metric == 'cumul_avg':
        # Color is based on timestep loss
        values = np.concatenate(timesteps_loss)
    else:
        raise ValueError("Unrecognized metric: {}".format(metric))

    cmap = cm.get_cmap('bwr')
    vmin = np.percentile(values, 5)
    vmax = np.percentile(values, 95)
    scalar_map = cm.ScalarMappable(norm=mplcolors.Normalize(vmin=vmin, vmax=vmax), cmap=cmap)

    streamlines = []
    colors = []

    for i, t, l, seq_l in zip(timesteps_inputs, timesteps_targets, timesteps_loss, seq_loss):
        pts = np.r_[i[:, :3], [i[-1, :3] + t[-1]]]

        color = np.zeros_like(pts)
        if metric == 'sequence':
            # Streamline color is based on sequence loss
            color[:, :] = scalar_map.to_rgba(seq_l, bytes=True)[:3]
        elif metric == 'timestep':
            # Streamline color is based on timestep loss
            # Identify first point with green
            color[0, :] = [0, 255, 0]
            color[1:, :] = scalar_map.to_rgba(l, bytes=True)[:, :3]
        elif metric == 'cumul_avg':
            # Streamline color is based on timestep loss

            # Compute cumulative average
            cumul_avg = np.cumsum(l) / np.arange(1, len(l) + 1)

            # Identify first point with green
            color[0, :] = [0, 255, 0]
            color[1:, :] = scalar_map.to_rgba(cumul_avg, bytes=True)[:, :3]
        else:
            raise ValueError("Unrecognized metric: {}".format(metric))

        streamlines.append(pts)
        colors.append(color)

    tractogram = nib.streamlines.Tractogram(streamlines, data_per_point={"colors": colors})
    return tractogram
def prediction_tractogram(hyperparams, model, dataset, batch_size_override, prediction_method):
    loss = loss_factory(hyperparams, model, dataset, loss_type=prediction_method)
    batch_scheduler = batch_scheduler_factory(hyperparams, dataset, train_mode=False, batch_size_override=batch_size_override, use_data_augment=False)

    _ = loss.losses  # Hack to generate update dict in loss :(
    predictions = loss.samples

    predict, timestep_losses, inputs, targets, masks = log_variables(batch_scheduler,
                                                                     model,
                                                                     predictions,
                                                                     loss.loss_per_time_step,
                                                                     dataset.symb_inputs * 1,
                                                                     dataset.symb_targets * 1,
                                                                     dataset.symb_mask * 1)
    if hyperparams['model'] == 'ffnn_regression':
        # Regrouping data into streamlines will only work if the original streamlines were NOT shuffled, resampled or augmented
        timesteps_prediction = ArraySequence()
        timesteps_loss = ArraySequence()
        timesteps_inputs = ArraySequence()
        timesteps_targets = ArraySequence()
        idx = 0
        for length in dataset.streamlines._lengths:
            start = idx
            idx = end = idx+length
            timesteps_prediction.extend(predict[start:end])
            timesteps_loss.extend(timestep_losses[start:end])
            timesteps_inputs.extend(inputs[start:end])
            timesteps_targets.extend(targets[start:end])
    else:
        timesteps_prediction = ArraySequence([p[:int(m.sum())] for p, m in zip(chain(*predict), chain(*masks))])
        timesteps_loss = ArraySequence([l[:int(m.sum())] for l, m in zip(chain(*timestep_losses), chain(*masks))])
        timesteps_inputs = ArraySequence([i[:int(m.sum())] for i, m in zip(chain(*inputs), chain(*masks))])
        # Use np.squeeze in case gru_multistep is used to remove the empty k=1 dimension
        timesteps_targets = ArraySequence([np.squeeze(t[:int(m.sum())]) for t, m in zip(chain(*targets), chain(*masks))])

    # Debug : Print norm stats
    # print("Dataset: {}; # of streamlines: {}".format(dataset.name, len(dataset)))
    # all_predictions = np.array(list(chain(*timesteps_prediction)))
    # prediction_norms = np.linalg.norm(all_predictions, axis=1)
    # print("Prediction norm --- Mean:{}; Max:{}; Min:{}".format(np.mean(prediction_norms), np.max(prediction_norms), np.min(prediction_norms)))
    # all_targets = np.array(list(chain(*timesteps_targets)))
    # target_norms = np.linalg.norm(all_targets, axis=1)
    # print("Target norm --- Mean:{}; Max:{}; Min:{}".format(np.mean(target_norms), np.max(target_norms), np.min(target_norms)))

    # Color is based on timestep loss
    cmap = cm.get_cmap('bwr')
    values = np.concatenate(timesteps_loss)
    vmin = np.percentile(values, 5)
    vmax = np.percentile(values, 95)
    scalar_map = cm.ScalarMappable(norm=mplcolors.Normalize(vmin=vmin, vmax=vmax), cmap=cmap)

    streamlines = []
    colors = []

    for i, t, p, l in zip(timesteps_inputs, timesteps_targets, timesteps_prediction, timesteps_loss):
        pts = np.r_[i[:, :3], [i[-1, :3] + t[-1]]]

        streamline = np.zeros(((len(pts) - 1) * 3 + 1, 3))
        streamline[::3] = pts
        streamline[1:-1:3] = pts[:-1] + p
        streamline[2:-1:3] = pts[:-1]
        streamlines.append(streamline)

        # Color input streamlines in a uniform color, then color predictions based on L2 error
        color = np.zeros_like(streamline)

        # Base color of streamlines is minimum value (best score)
        color[:] = scalar_map.to_rgba(vmin, bytes=True)[:3]
        color[1:-1:3, :] = scalar_map.to_rgba(l, bytes=True)[:, :3]
        colors.append(color)

    tractogram = nib.streamlines.Tractogram(streamlines, data_per_point={"colors": colors})
    return tractogram
Example #16
0
def test_gru_mixture_fprop_neighborhood():
    hyperparams = {
        'model': 'gru_mixture',
        'SGD': "1e-2",
        'hidden_sizes': 50,
        'batch_size': 16,
        'learn_to_stop': False,
        'normalize': True,
        'activation': 'tanh',
        'feed_previous_direction': False,
        'predict_offset': False,
        'use_layer_normalization': False,
        'drop_prob': 0.,
        'use_zoneout': False,
        'skip_connections': False,
        'seed': 1234,
        'noisy_streamlines_sigma': None,
        'keep_step_size': True,
        'sort_streamlines': False,
        'n_gaussians': 2,
        'neighborhood_radius': 0.5
    }

    with Timer("Creating dataset", newline=True):
        volume_manager = neurotools.VolumeManager()
        trainset = make_dummy_dataset(volume_manager)
        print("Dataset sizes:", len(trainset))

        batch_scheduler = factories.batch_scheduler_factory(hyperparams,
                                                            dataset=trainset)
        print("An epoch will be composed of {} updates.".format(
            batch_scheduler.nb_updates_per_epoch))
        print(volume_manager.data_dimension, hyperparams['hidden_sizes'],
              batch_scheduler.target_size)

    with Timer("Creating model"):
        model = factories.model_factory(
            hyperparams,
            input_size=volume_manager.data_dimension,
            output_size=batch_scheduler.target_size,
            volume_manager=volume_manager)
        model.initialize(
            factories.weigths_initializer_factory("orthogonal", seed=1234))

        print("Input size: {}".format(model.model_input_size))

    # Test fprop with missing streamlines from one subject in a batch
    output = model.get_output(trainset.symb_inputs)
    fct = theano.function([trainset.symb_inputs],
                          output,
                          updates=model.graph_updates)

    batch_inputs, batch_targets, batch_mask = batch_scheduler._next_batch(2)
    out = fct(batch_inputs)

    with Timer("Building optimizer"):
        loss = factories.loss_factory(hyperparams, model, trainset)
        optimizer = factories.optimizer_factory(hyperparams, loss)

    fct_loss = theano.function(
        [trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask],
        loss.loss,
        updates=model.graph_updates)

    loss_value = fct_loss(batch_inputs, batch_targets, batch_mask)
    print("Loss:", loss_value)

    fct_optim = theano.function(
        [trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask],
        list(optimizer.directions.values()),
        updates=model.graph_updates)

    dirs = fct_optim(batch_inputs, batch_targets, batch_mask)

    return True