def test_gru_mixture_fprop():
    hidden_sizes = 50

    with Timer("Creating dataset", newline=True):
        volume_manager = neurotools.VolumeManager()
        trainset = make_dummy_dataset(volume_manager)
        print("Dataset sizes:", len(trainset))

        batch_scheduler = batch_schedulers.TractographyBatchScheduler(
            trainset, batch_size=16, noisy_streamlines_sigma=None, seed=1234)
        print("An epoch will be composed of {} updates.".format(
            batch_scheduler.nb_updates_per_epoch))
        print(volume_manager.data_dimension, hidden_sizes,
              batch_scheduler.target_size)

    with Timer("Creating model"):
        hyperparams = {
            'model': 'gru_mixture',
            'n_gaussians': 2,
            'SGD': "1e-2",
            'hidden_sizes': hidden_sizes,
            'learn_to_stop': False,
            'normalize': False,
            'feed_previous_direction': False
        }
        model = factories.model_factory(
            hyperparams,
            input_size=volume_manager.data_dimension,
            output_size=batch_scheduler.target_size,
            volume_manager=volume_manager)
        model.initialize(
            factories.weigths_initializer_factory("orthogonal", seed=1234))

    # Test fprop with missing streamlines from one subject in a batch
    output = model.get_output(trainset.symb_inputs)
    fct = theano.function([trainset.symb_inputs],
                          output,
                          updates=model.graph_updates)

    batch_inputs, batch_targets, batch_mask = batch_scheduler._next_batch(2)
    out = fct(batch_inputs)

    with Timer("Building optimizer"):
        loss = factories.loss_factory(hyperparams, model, trainset)
        optimizer = factories.optimizer_factory(hyperparams, loss)

    fct_loss = theano.function(
        [trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask],
        loss.loss,
        updates=model.graph_updates)

    loss_value = fct_loss(batch_inputs, batch_targets, batch_mask)
    print("Loss:", loss_value)

    fct_optim = theano.function(
        [trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask],
        list(optimizer.directions.values()),
        updates=model.graph_updates)

    dirs = fct_optim(batch_inputs, batch_targets, batch_mask)
Example #2
0
def load_mask_classifier_dataset(subject_files,
                                 volume_manager,
                                 name="HCP",
                                 use_sh_coeffs=False):
    subjects = []
    with Timer("  Loading subject(s)", newline=True):
        for subject_file in sorted(subject_files):
            print("    {}".format(subject_file))
            mask_data = MaskClassifierData.load(subject_file)

            dwi = mask_data.signal
            bvals = mask_data.gradients.bvals
            bvecs = mask_data.gradients.bvecs
            if use_sh_coeffs:
                # Use 45 spherical harmonic coefficients to represent the diffusion signal.
                volume = neurotools.get_spherical_harmonics_coefficients(
                    dwi, bvals, bvecs).astype(np.float32)
            else:
                # Resample the diffusion signal to have 100 directions.
                volume = neurotools.resample_dwi(dwi, bvals,
                                                 bvecs).astype(np.float32)

            mask_data.signal.uncache(
            )  # Free some memory as we don't need the original signal.
            subject_id = volume_manager.register(volume)
            mask_data.subject_id = subject_id
            subjects.append(mask_data)

    return MaskClassifierDataset(subjects, name, keep_on_cpu=True)
Example #3
0
def main():
    parser = build_parser()
    args = parser.parse_args()

    for f in args.datasets:
        with Timer("Normalizing step size of dataset '{}'".format(f)):
            tractography_data = neurotools.TractographyData.load(f)

            t = nib.streamlines.Tractogram(tractography_data.streamlines)
            t.apply_affine(
                tractography_data.signal.affine)  # Bring streamlines to RAS+mm

            streamlines = t.streamlines
            streamlines._lengths = streamlines._lengths.astype(int)
            streamlines._offsets = streamlines._offsets.astype(int)
            lengths = length(streamlines)
            nb_points = np.ceil(lengths / args.step_size).astype(int)

            new_streamlines = (set_number_of_points(s, n)
                               for s, n in zip(streamlines, nb_points))

            t = nib.streamlines.Tractogram(new_streamlines)
            t.apply_affine(np.linalg.inv(tractography_data.signal.affine))
            t.affine_to_rasmm = np.eye(4)
            tractography_data.streamlines = t.streamlines

        filename = f[:-4] + "_" + str(args.step_size) + "mm" + f[-4:]
        tractography_data.save(filename)
Example #4
0
def main():
    parser = build_parser()
    args = parser.parse_args()

    for f in args.datasets:
        with Timer("Loading dataset '{}'".format(f)):
            tractography_data = neurotools.TractographyData.load(f)

        print(tractography_data)
def main():
    parser = build_parser()
    args = parser.parse_args()
    print(args)

    with Timer("Loading streamlines"):
        trk = nib.streamlines.load(args.tractogram)
        losses = trk.tractogram.data_per_streamline['loss']
        del trk.tractogram.data_per_streamline['loss']  # Not supported in MI-Brain for my version.

    with Timer("Coloring streamlines"):
        viridis = plt.get_cmap('RdYlGn')

        losses = -losses[:, 0]
        losses -= losses.mean()

        vmin = losses.min()
        vmax = losses.max()

        if args.normalization == "norm":
            cNorm = colors.Normalize(vmin=vmin, vmax=vmax)
        elif args.normalization == "log":
            cNorm = colors.LogNorm(vmin=vmin, vmax=vmax)
        elif args.normalization == "symlog":
            cNorm = colors.SymLogNorm(linthresh=0.03, linscale=1, vmin=vmin, vmax=vmax)
        else:
            raise ValueError("Unkown normalization: {}".format(args.normalization))

        scalarMap = cm.ScalarMappable(norm=cNorm, cmap=viridis)
        print(scalarMap.get_clim())
        # losses -= losses.mean()
        # losses /= losses.std()
        streamlines_colors = scalarMap.to_rgba(losses, bytes=True)[:, :-1]

        # from dipy.viz import fvtk
        # streamlines_colors = fvtk.create_colormap(-losses[:, 0]) * 255
        colors_per_point = ArraySequence([np.tile(c, (len(s), 1)) for s, c in zip(trk.tractogram.streamlines, streamlines_colors)])
        trk.tractogram.data_per_point['color'] = colors_per_point

    with Timer("Saving streamlines"):
        if args.out is None:
           args.out = args.tractogram[:-4] + "_color_" + args.normalization + args.tractogram[-4:]

        nib.streamlines.save(trk.tractogram, args.out)
Example #6
0
def main():
    parser = buildArgsParser()
    args = parser.parse_args()

    data = TractographyData.load(args.dataset)
    streamlines = data.streamlines
    print("{} has {:,} streamlines".format(args.dataset, len(streamlines)))

    if args.list_bundles_name:
        for bundle_name in data.bundle_names:
            bundle_id = data.name2id[bundle_name]
            print("{}: {}".format(bundle_id, bundle_name))

        return

    if args.leave_one_out is not None:
        with Timer("Splitting {} using a leave-one-out strategy".format(
                args.dataset),
                   newline=True):
            for bundle in args.leave_one_out:
                rng = np.random.RandomState(args.seed)
                train_data = TractographyData(data.signal, data.gradients,
                                              data.name2id)
                valid_data = TractographyData(data.signal, data.gradients,
                                              data.name2id)
                test_data = TractographyData(data.signal, data.gradients,
                                             data.name2id)

                bundle_ids_to_exclude = list(map(int, bundle.split(',')))
                missing_bundles_name = [
                    data.bundle_names[i] for i in bundle_ids_to_exclude
                ]

                if args.verbose:
                    print("Leaving out {}...".format(
                        ", ".join(missing_bundles_name)))

                include = np.ones(len(data.bundle_ids), dtype=bool)
                exclude = np.zeros(len(data.bundle_ids), dtype=bool)
                for i in bundle_ids_to_exclude:
                    include = np.logical_and(include, data.bundle_ids != i)
                    exclude = np.logical_or(exclude, data.bundle_ids == i)

                include_idx = np.where(include)[0]
                exclude_idx = np.where(exclude)[0]
                rng.shuffle(include_idx)
                rng.shuffle(exclude_idx)

                trainset_indices = include_idx
                validset_indices = exclude_idx[:len(exclude_idx) // 2]
                testset_indices = exclude_idx[len(exclude_idx) // 2:]

                train_data.add(streamlines[trainset_indices],
                               bundle_ids=data.bundle_ids[trainset_indices])
                valid_data.add(streamlines[validset_indices],
                               bundle_ids=data.bundle_ids[validset_indices])
                test_data.add(streamlines[testset_indices],
                              bundle_ids=data.bundle_ids[testset_indices])

                filename = "missing_{}.npz".format(
                    "_".join(missing_bundles_name))
                with Timer("Saving dataset: {}".format(filename[:-4])):
                    train_data.save(filename[:-4] + "_trainset.npz")
                    valid_data.save(filename[:-4] + "_validset.npz")
                    test_data.save(filename[:-4] + "_testset.npz")

    else:
        rng = np.random.RandomState(args.seed)
        train_data = TractographyData(data.signal, data.gradients,
                                      data.name2id)
        valid_data = TractographyData(data.signal, data.gradients,
                                      data.name2id)
        test_data = TractographyData(data.signal, data.gradients, data.name2id)

        with Timer("Splitting {} as follow {} using {}".format(
                args.dataset, args.split, args.split_type),
                   newline=args.verbose):
            for bundle_name in data.bundle_names:
                if args.verbose:
                    print("Splitting bundle {}...".format(bundle_name))

                bundle_id = data.name2id[bundle_name]
                indices = np.where(data.bundle_ids == bundle_id)[0]
                nb_examples = len(indices)
                rng.shuffle(indices)

                if args.split_type == "percentage":
                    trainset_size = int(np.round(args.split[0] * nb_examples))
                    validset_size = int(np.round(args.split[1] * nb_examples))
                    testset_size = int(np.round(args.split[2] * nb_examples))
                    # Make sure the splits sum to nb_examples
                    testset_size += nb_examples - (
                        trainset_size + validset_size + testset_size)
                elif args.split_type == "count":
                    raise NotImplementedError(
                        "Split type `count` not implemented yet!")

                assert trainset_size + validset_size + testset_size == nb_examples

                trainset_indices = indices[:trainset_size]
                validset_indices = indices[trainset_size:-testset_size]
                testset_indices = indices[-testset_size:]

                train_data.add(streamlines[trainset_indices], bundle_name)
                valid_data.add(streamlines[validset_indices], bundle_name)
                test_data.add(streamlines[testset_indices], bundle_name)

        with Timer("Saving"):
            train_data.save(args.dataset[:-4] + "_trainset.npz")
            valid_data.save(args.dataset[:-4] + "_validset.npz")
            test_data.save(args.dataset[:-4] + "_testset.npz")

        if args.delete:
            os.remove(args.dataset)
Example #7
0
def main():
    parser = build_parser()
    args = parser.parse_args()
    print(args)

    if min(args.keep_top) < 0:
        parser.error("--keep-top must be between in [0, 1].")

    # Get experiment folder
    experiment_path = args.name
    if not os.path.isdir(experiment_path):
        # If not a directory, it must be the name of the experiment.
        experiment_path = pjoin(".", "experiments", args.name)

    if not os.path.isdir(experiment_path):
        parser.error('Cannot find experiment: {0}!'.format(args.name))

    # Load experiments hyperparameters
    try:
        hyperparams = smartutils.load_dict_from_json_file(
            pjoin(experiment_path, "hyperparams.json"))
    except FileNotFoundError:
        hyperparams = smartutils.load_dict_from_json_file(
            pjoin(experiment_path, "..", "hyperparams.json"))

    # Use this for hyperparams added in a new version, but nonexistent from older versions
    retrocompatibility_defaults = {
        'feed_previous_direction': False,
        'predict_offset': False,
        'normalize': False,
        'keep_step_size': False,
        'sort_streamlines': False
    }
    for new_hyperparams, default_value in retrocompatibility_defaults.items():
        if new_hyperparams not in hyperparams:
            hyperparams[new_hyperparams] = default_value

    with Timer("Loading signal data and tractogram", newline=True):
        volume_manager = VolumeManager()
        dataset = datasets.load_tractography_dataset_from_dwi_and_tractogram(
            args.signal,
            args.tractogram,
            volume_manager,
            use_sh_coeffs=hyperparams['use_sh_coeffs'],
            bvals=args.bvals,
            bvecs=args.bvecs,
            step_size=args.step_size)
        print("Dataset size:", len(dataset))

        if vizu_available and args.vizu:
            vizu.check_dataset_integrity(dataset, subset=0.2)

    with Timer("Loading model"):
        loss_type = args.loss_type
        model = None
        if hyperparams['model'] == 'gru_regression':
            from learn2track.models import GRU_Regression
            model = GRU_Regression.create(experiment_path,
                                          volume_manager=volume_manager)
        elif hyperparams['model'] == 'gru_mixture':
            from learn2track.models import GRU_Mixture
            model = GRU_Mixture.create(experiment_path,
                                       volume_manager=volume_manager)
        elif hyperparams['model'] == 'gru_multistep':
            from learn2track.models import GRU_Multistep_Gaussian
            model = GRU_Multistep_Gaussian.create(
                experiment_path, volume_manager=volume_manager)
            model.k = 1
            model.m = 1
        elif hyperparams['model'] == 'ffnn_regression':
            from learn2track.models import FFNN_Regression
            model = FFNN_Regression.create(experiment_path,
                                           volume_manager=volume_manager)

            if loss_type in ['l2_sum', 'l2_mean']:
                loss_type = "expected_value"

        else:
            raise NameError("Unknown model: {}".format(hyperparams['model']))

    with Timer("Building evaluation function"):
        # Override K for gru_multistep
        if 'k' in hyperparams:
            hyperparams['k'] = 1

        batch_scheduler = batch_scheduler_factory(
            hyperparams,
            dataset,
            use_data_augment=
            False,  # Otherwise it doubles the number of losses :-/
            train_mode=False,
            batch_size_override=args.batch_size)
        loss = loss_factory(hyperparams, model, dataset, loss_type=loss_type)
        l2_error = views.LossView(loss=loss, batch_scheduler=batch_scheduler)

    with Timer("Scoring...", newline=True):
        dummy_status = Status()  # Forces recomputing results
        losses = l2_error.losses.view(dummy_status)

        if hyperparams['model'] == 'ffnn_regression':
            _losses = dataset.streamlines.copy()
            _losses._data = losses.copy()
            _losses._lengths -= 1
            _losses._offsets -= np.arange(len(dataset.streamlines))

            if args.loss_type == 'l2_sum':
                losses = np.asarray([l.sum() for l in _losses])
            elif args.loss_type == 'l2_mean':
                losses = np.asarray([l.mean() for l in _losses])

        mean = float(l2_error.mean.view(dummy_status))
        stderror = float(l2_error.stderror.view(dummy_status))

        print("Loss: {:.4f} ± {:.4f}".format(mean, stderror))
        print("Min: {:.4f}".format(losses.min()))
        print("Max: {:.4f}".format(losses.max()))
        print("Percentiles: {}".format(
            np.percentile(losses, [0, 25, 50, 75, 100])))

    with Timer("Saving streamlines"):
        nii = dataset.subjects[0].signal
        tractogram = nib.streamlines.Tractogram(
            dataset.streamlines[batch_scheduler.indices],
            affine_to_rasmm=nii.affine)
        tractogram.data_per_streamline['loss'] = losses

        header = {}
        header[Field.VOXEL_TO_RASMM] = nii.affine.copy()
        header[Field.VOXEL_SIZES] = nii.header.get_zooms()[:3]
        header[Field.DIMENSIONS] = nii.shape[:3]
        header[Field.VOXEL_ORDER] = "".join(aff2axcodes(nii.affine))

        nib.streamlines.save(tractogram.copy(), args.out, header=header)

    if len(args.keep_top) > 0:
        for keep_top in args.keep_top:
            with Timer("Saving top {}% streamlines".format(keep_top)):
                idx = np.argsort(losses)
                idx = idx[:int(keep_top * len(losses))]
                print("Keeping {}/{} streamlines".format(
                    len(idx), len(losses)))
                sub_tractogram = tractogram[idx]
                out_filename = args.out[:-4] + "_top{}".format(
                    keep_top) + ".tck"
                nib.streamlines.save(sub_tractogram, out_filename)
Example #8
0
def main():
    parser = build_argparser()
    args = parser.parse_args()

    # Get experiment folder
    experiment_path = args.name
    if not os.path.isdir(experiment_path):
        # If not a directory, it must be the name of the experiment.
        experiment_path = pjoin(".", "experiments", args.name)

    if not os.path.isdir(experiment_path):
        parser.error('Cannot find experiment: {0}!'.format(args.name))

    # Load experiments hyperparameters
    try:
        hyperparams = smartutils.load_dict_from_json_file(
            pjoin(experiment_path, "hyperparams.json"))
    except FileNotFoundError:
        hyperparams = smartutils.load_dict_from_json_file(
            pjoin(experiment_path, "..", "hyperparams.json"))

    with Timer("Loading DWIs"):
        # Load gradients table
        dwi_name = args.dwi
        if dwi_name.endswith(".gz"):
            dwi_name = dwi_name[:-3]
        if dwi_name.endswith(".nii"):
            dwi_name = dwi_name[:-4]

        try:
            bvals_filename = dwi_name + ".bvals"
            bvecs_filename = dwi_name + ".bvecs"
            bvals, bvecs = dipy.io.gradients.read_bvals_bvecs(
                bvals_filename, bvecs_filename)
        except FileNotFoundError:
            try:
                bvals_filename = dwi_name + ".bval"
                bvecs_filename = dwi_name + ".bvec"
                bvals, bvecs = dipy.io.gradients.read_bvals_bvecs(
                    bvals_filename, bvecs_filename)
            except FileNotFoundError as e:
                print("Could not find .bvals/.bvecs or .bval/.bvec files...")
                raise e

        dwi = nib.load(args.dwi)
        if hyperparams["use_sh_coeffs"]:
            # Use 45 spherical harmonic coefficients to represent the diffusion signal.
            weights = neurotools.get_spherical_harmonics_coefficients(
                dwi, bvals, bvecs).astype(np.float32)
        else:
            # Resample the diffusion signal to have 100 directions.
            weights = neurotools.resample_dwi(dwi, bvals,
                                              bvecs).astype(np.float32)

        affine_rasmm2dwivox = np.linalg.inv(dwi.affine)

    with Timer("Loading model"):
        if hyperparams["model"] == "gru_regression":
            from learn2track.models import GRU_Regression
            model_class = GRU_Regression
        elif hyperparams['model'] == 'gru_gaussian':
            from learn2track.models import GRU_Gaussian
            model_class = GRU_Gaussian
        elif hyperparams['model'] == 'gru_mixture':
            from learn2track.models import GRU_Mixture
            model_class = GRU_Mixture
        elif hyperparams['model'] == 'gru_multistep':
            from learn2track.models import GRU_Multistep_Gaussian
            model_class = GRU_Multistep_Gaussian
        elif hyperparams['model'] == 'ffnn_regression':
            from learn2track.models import FFNN_Regression
            model_class = FFNN_Regression
        else:
            raise ValueError("Unknown model!")

        kwargs = {}
        volume_manager = neurotools.VolumeManager()
        volume_manager.register(weights)
        kwargs['volume_manager'] = volume_manager

        # Load the actual model.
        model = model_class.create(
            pjoin(experiment_path),
            **kwargs)  # Create new instance and restore model.
        model.drop_prob = 0.
        print(str(model))

    mask = None
    if args.mask is not None:
        with Timer("Loading mask"):
            mask_nii = nib.load(args.mask)
            mask = mask_nii.get_data()
            # Compute the affine allowing to evaluate the mask at some coordinates correctly.

            # affine_maskvox2dwivox = mask_vox => rasmm space => dwi_vox
            affine_maskvox2dwivox = np.dot(affine_rasmm2dwivox,
                                           mask_nii.affine)
            if args.dilate_mask:
                import scipy
                mask = scipy.ndimage.morphology.binary_dilation(mask).astype(
                    mask.dtype)

    with Timer("Generating seeds"):
        seeds = []

        for filename in args.seeds:
            if filename.endswith('.trk') or filename.endswith('.tck'):
                tfile = nib.streamlines.load(filename)
                # Send the streamlines to voxel since that's where we'll track.
                tfile.tractogram.apply_affine(affine_rasmm2dwivox)

                # Use extremities of the streamlines as seeding points.
                seeds += [s[0] for s in tfile.streamlines]
                seeds += [s[-1] for s in tfile.streamlines]

            else:
                # Assume it is a binary mask.
                rng = np.random.RandomState(args.seeding_rng_seed)
                nii_seeds = nib.load(filename)

                # affine_seedsvox2dwivox = mask_vox => rasmm space => dwi_vox
                affine_seedsvox2dwivox = np.dot(affine_rasmm2dwivox,
                                                nii_seeds.affine)

                nii_seeds_data = nii_seeds.get_data()

                if args.dilate_seeding_mask:
                    import scipy
                    nii_seeds_data = scipy.ndimage.morphology.binary_dilation(
                        nii_seeds_data).astype(nii_seeds_data.dtype)

                indices = np.array(np.where(nii_seeds_data)).T
                for idx in indices:
                    seeds_in_voxel = idx + rng.uniform(
                        -0.5, 0.5, size=(args.nb_seeds_per_voxel, 3))
                    seeds_in_voxel = nib.affines.apply_affine(
                        affine_seedsvox2dwivox, seeds_in_voxel)
                    seeds.extend(seeds_in_voxel)

        seeds = np.array(seeds, dtype=theano.config.floatX)

    with Timer("Tracking in the diffusion voxel space"):
        voxel_sizes = np.asarray(dwi.header.get_zooms()[:3])
        if not np.all(voxel_sizes == dwi.header.get_zooms()[0]):
            print("* Careful voxel are anisotropic {}!".format(
                tuple(voxel_sizes)))
        # Since we are tracking in diffusion voxel space, convert step_size (in mm) to voxel.

        if args.step_size is not None:
            step_size = np.float32(args.step_size / voxel_sizes.max())
            # Also convert max length (in mm) to voxel.
            max_nb_points = int(np.ceil(args.max_length / args.step_size))
        else:
            step_size = None
            max_nb_points = args.max_length

        if args.theta is not None:
            theta = np.deg2rad(args.theta)
        elif args.curvature is not None and args.curvature > 0:
            theta = get_max_angle_from_curvature(args.curvature, step_size)
        else:
            theta = np.deg2rad(45)

        print("Angle: {}".format(np.rad2deg(theta)))
        print("Step size (vox): {}".format(step_size))
        print("Max nb. points: {}".format(max_nb_points))

        is_outside_mask = make_is_outside_mask(mask,
                                               affine_maskvox2dwivox,
                                               threshold=args.mask_threshold)
        is_too_long = make_is_too_long(max_nb_points)
        is_too_curvy = make_is_too_curvy(np.rad2deg(theta))
        is_unlikely = make_is_unlikely(0.5)
        is_stopping = make_is_stopping({
            STOPPING_MASK: is_outside_mask,
            STOPPING_LENGTH: is_too_long,
            STOPPING_CURVATURE: is_too_curvy,
            STOPPING_LIKELIHOOD: is_unlikely
        })

        is_stopping.max_nb_points = max_nb_points  # Small hack

        tractogram = batch_track(model,
                                 weights,
                                 seeds,
                                 step_size=step_size,
                                 is_stopping=is_stopping,
                                 batch_size=args.batch_size,
                                 args=args)

        # Streamlines have been generated in voxel space.
        # Transform them them back to RAS+mm space using the dwi's affine.
        tractogram.affine_to_rasmm = dwi.affine
        tractogram.to_world()  # Performed in-place.

    nb_streamlines = len(tractogram)

    if args.save_rejected:
        rejected_tractogram = Tractogram()
        rejected_tractogram.affine_to_rasmm = tractogram._affine_to_rasmm

    print("Generated {:,} (compressed) streamlines".format(nb_streamlines))
    with Timer("Cleaning streamlines", newline=True):
        # Flush streamlines that have no points.
        if args.save_rejected:
            rejected_tractogram += tractogram[
                np.array(list(map(len, tractogram))) <= 0]

        tractogram = tractogram[np.array(list(map(len, tractogram))) > 0]
        print("Removed {:,} empty streamlines".format(nb_streamlines -
                                                      len(tractogram)))

        # Remove small streamlines
        nb_streamlines = len(tractogram)
        lengths = dipy.tracking.streamline.length(tractogram.streamlines)

        if args.save_rejected:
            rejected_tractogram += tractogram[lengths < args.min_length]

        tractogram = tractogram[lengths >= args.min_length]
        lengths = lengths[lengths >= args.min_length]
        if len(lengths) > 0:
            print("Average length: {:.2f} mm.".format(lengths.mean()))
            print("Minimum length: {:.2f} mm. Maximum length: {:.2f}".format(
                lengths.min(), lengths.max()))
        print("Removed {:,} streamlines smaller than {:.2f} mm".format(
            nb_streamlines - len(tractogram), args.min_length))
        if args.discard_stopped_by_curvature:
            nb_streamlines = len(tractogram)
            stopping_curvature_flag_is_set = is_flag_set(
                tractogram.data_per_streamline['stopping_flags'][:, 0],
                STOPPING_CURVATURE)

            if args.save_rejected:
                rejected_tractogram += tractogram[
                    stopping_curvature_flag_is_set]

            tractogram = tractogram[np.logical_not(
                stopping_curvature_flag_is_set)]
            print(
                "Removed {:,} streamlines stopped for having a curvature higher than {:.2f} degree"
                .format(nb_streamlines - len(tractogram), np.rad2deg(theta)))

        if args.filter_threshold is not None:
            # Remove streamlines that produces a reconstruction error higher than a certain threshold.
            nb_streamlines = len(tractogram)
            losses = compute_loss_errors(tractogram.streamlines, model,
                                         hyperparams)
            print("Mean loss: {:.4f} ± {:.4f}".format(
                np.mean(losses),
                np.std(losses, ddof=1) / np.sqrt(len(losses))))

            if args.save_rejected:
                rejected_tractogram += tractogram[
                    losses > args.filter_threshold]

            tractogram = tractogram[losses <= args.filter_threshold]
            print(
                "Removed {:,} streamlines producing a loss lower than {:.2f} mm"
                .format(nb_streamlines - len(tractogram),
                        args.filter_threshold))

    with Timer("Saving {:,} (compressed) streamlines".format(len(tractogram))):
        filename = args.out
        if args.out is None:
            prefix = args.prefix
            if prefix is None:
                dwi_name = os.path.basename(args.dwi)
                if dwi_name.endswith(".nii.gz"):
                    dwi_name = dwi_name[:-7]
                else:  # .nii
                    dwi_name = dwi_name[:-4]

                prefix = os.path.basename(os.path.dirname(args.dwi)) + dwi_name
                prefix = prefix.replace(".", "_")

            seed_mask_type = args.seeds[0].replace(".", "_").replace(
                "_", "").replace("/", "-")
            if "int" in args.seeds[0]:
                seed_mask_type = "int"
            elif "wm" in args.seeds[0]:
                seed_mask_type = "wm"
            elif "rois" in args.seeds[0]:
                seed_mask_type = "rois"
            elif "bundles" in args.seeds[0]:
                seed_mask_type = "bundles"

            mask_type = ""
            if "fa" in args.mask:
                mask_type = "fa"
            elif "wm" in args.mask:
                mask_type = "wm"

            if args.dilate_seeding_mask:
                seed_mask_type += "D"

            if args.dilate_mask:
                mask_type += "D"

            filename_items = [
                "{}",
                "useMaxComponent-{}",
                # "seed-{}",
                # "mask-{}",
                "step-{:.2f}mm",
                "nbSeeds-{}",
                "maxAngleDeg-{:.1f}"
                # "keepCurv-{}",
                # "filtered-{}",
                # "minLen-{}",
                # "pftRetry-{}",
                # "pftHist-{}",
                # "trackLikePeter-{}",
            ]
            filename = ('_'.join(filename_items) + ".tck").format(
                prefix,
                args.use_max_component,
                # seed_mask_type,
                # mask_type,
                args.step_size,
                args.nb_seeds_per_voxel,
                np.rad2deg(theta)
                # not args.discard_stopped_by_curvature,
                # args.filter_threshold,
                # args.min_length,
                # args.pft_nb_retry,
                # args.pft_nb_backtrack_steps,
                # args.track_like_peter
            )

        save_path = pjoin(experiment_path, filename)
        try:  # Create dirs, if needed.
            os.makedirs(os.path.dirname(save_path))
        except:
            pass

        print("Saving to {}".format(save_path))
        nib.streamlines.save(tractogram, save_path)

    if args.save_rejected:
        with Timer("Saving {:,} (compressed) rejected streamlines".format(
                len(rejected_tractogram))):
            rejected_filename_items = filename_items.copy()
            rejected_filename_items.insert(1, "rejected")
            rejected_filename = (
                '_'.join(rejected_filename_items) + ".tck"
            ).format(
                prefix,
                args.use_max_component,
                # seed_mask_type,
                # mask_type,
                args.step_size,
                args.nb_seeds_per_voxel,
                np.rad2deg(theta)
                # not args.discard_stopped_by_curvature,
                # args.filter_threshold,
                # args.min_length,
                # args.pft_nb_retry,
                # args.pft_nb_backtrack_steps,
                # args.track_like_peter
            )

        rejected_save_path = pjoin(experiment_path, rejected_filename)
        try:  # Create dirs, if needed.
            os.makedirs(os.path.dirname(rejected_save_path))
        except:
            pass

        print("Saving rejected streamlines to {}".format(rejected_save_path))
        nib.streamlines.save(rejected_tractogram, rejected_save_path)
def main():
    parser = build_args_parser()
    args = parser.parse_args()
    print(args)

    # Get experiment folder
    experiment_path = args.name
    if not os.path.isdir(experiment_path):
        # If not a directory, it must be the name of the experiment.
        experiment_path = pjoin(".", "experiments", args.name)

    if not os.path.isdir(experiment_path):
        parser.error('Cannot find experiment: {0}!'.format(args.name))

    # Load experiments hyperparameters
    try:
        hyperparams = smartutils.load_dict_from_json_file(
            pjoin(experiment_path, "hyperparams.json"))
    except FileNotFoundError:
        hyperparams = smartutils.load_dict_from_json_file(
            pjoin(experiment_path, "..", "hyperparams.json"))

    with Timer("Loading dataset", newline=True):
        volume_manager = VolumeManager()
        dataset = datasets.load_tractography_dataset(
            [args.streamlines],
            volume_manager,
            name="dataset",
            use_sh_coeffs=hyperparams['use_sh_coeffs'])
        print("Dataset size:", len(dataset))

    with Timer("Loading model"):
        model = None
        if hyperparams['model'] == 'gru_regression':
            from learn2track.models import GRU_Regression
            model = GRU_Regression.create(experiment_path,
                                          volume_manager=volume_manager)
        elif hyperparams['model'] == 'gru_mixture':
            from learn2track.models import GRU_Mixture
            model = GRU_Mixture.create(experiment_path,
                                       volume_manager=volume_manager)
        elif hyperparams['model'] == 'gru_multistep':
            from learn2track.models import GRU_Multistep_Gaussian
            model = GRU_Multistep_Gaussian.create(
                experiment_path, volume_manager=volume_manager)
            model.k = 1
            model.m = 1
        elif hyperparams['model'] == 'ffnn_regression':
            from learn2track.models import FFNN_Regression
            model = FFNN_Regression.create(experiment_path,
                                           volume_manager=volume_manager)
        else:
            raise NameError("Unknown model: {}".format(hyperparams['model']))
        print(str(model))

    tractogram_file = pjoin(experiment_path, args.out)
    if not os.path.isfile(tractogram_file) or args.force:
        if args.method == 'prediction':
            tractogram = prediction_tractogram(hyperparams, model, dataset,
                                               args.batch_size,
                                               args.prediction)
        elif args.method == 'evaluation':
            tractogram = evaluation_tractogram(hyperparams, model, dataset,
                                               args.batch_size, args.metric)
        else:
            raise ValueError("Unrecognized method: {}".format(args.method))

        tractogram.affine_to_rasmm = dataset.subjects[0].signal.affine
        nib.streamlines.save(tractogram, tractogram_file)
    else:
        print("Tractogram already exists. (use --force to generate it again)")
Example #10
0
def main():
    parser = build_argparser()
    args = parser.parse_args()
    print(args)
    print("Using Theano v.{}".format(theano.version.short_version))

    hyperparams_to_exclude = ['max_epoch', 'force', 'name', 'view', 'shuffle_streamlines']
    # Use this for hyperparams added in a new version, but nonexistent from older versions
    retrocompatibility_defaults = {'feed_previous_direction': False,
                                   'predict_offset': False,
                                   'normalize': False,
                                   'sort_streamlines': False,
                                   'keep_step_size': False,
                                   'use_layer_normalization': False,
                                   'drop_prob': 0.,
                                   'use_zoneout': False,
                                   'skip_connections': False}
    experiment_path, hyperparams, resuming = utils.maybe_create_experiment_folder(args, exclude=hyperparams_to_exclude,
                                                                                  retrocompatibility_defaults=retrocompatibility_defaults)

    # Log the command currently running.
    with open(pjoin(experiment_path, 'cmd.txt'), 'a') as f:
        f.write(" ".join(sys.argv) + "\n")

    print("Resuming:" if resuming else "Creating:", experiment_path)

    with Timer("Loading dataset", newline=True):
        trainset_volume_manager = VolumeManager()
        validset_volume_manager = VolumeManager()
        trainset = datasets.load_tractography_dataset(args.train_subjects, trainset_volume_manager, name="trainset",
                                                      use_sh_coeffs=args.use_sh_coeffs)
        validset = datasets.load_tractography_dataset(args.valid_subjects, validset_volume_manager, name="validset",
                                                      use_sh_coeffs=args.use_sh_coeffs)
        print("Dataset sizes:", len(trainset), " |", len(validset))

        batch_scheduler = batch_scheduler_factory(hyperparams, dataset=trainset, train_mode=True)
        print("An epoch will be composed of {} updates.".format(batch_scheduler.nb_updates_per_epoch))
        print(trainset_volume_manager.data_dimension, args.hidden_sizes, batch_scheduler.target_size)

    with Timer("Creating model"):
        input_size = trainset_volume_manager.data_dimension
        if hyperparams['feed_previous_direction']:
            input_size += 3

        model = model_factory(hyperparams,
                              input_size=input_size,
                              output_size=batch_scheduler.target_size,
                              volume_manager=trainset_volume_manager)
        model.initialize(weigths_initializer_factory(args.weights_initialization,
                                                     seed=args.initialization_seed))

    with Timer("Building optimizer"):
        loss = loss_factory(hyperparams, model, trainset)

        if args.clip_gradient is not None:
            loss.append_gradient_modifier(DirectionClipping(threshold=args.clip_gradient))

        optimizer = optimizer_factory(hyperparams, loss)

    with Timer("Building trainer"):
        trainer = Trainer(optimizer, batch_scheduler)

        # Log training error
        loss_monitor = views.MonitorVariable(loss.loss)
        avg_loss = tasks.AveragePerEpoch(loss_monitor)
        trainer.append_task(avg_loss)

        # Print average training loss.
        trainer.append_task(tasks.Print("Avg. training loss:         : {}", avg_loss))

        # if args.learn_to_stop:
        #     l2err_monitor = views.MonitorVariable(T.mean(loss.mean_sqr_error))
        #     avg_l2err = tasks.AveragePerEpoch(l2err_monitor)
        #     trainer.append_task(avg_l2err)
        #
        #     crossentropy_monitor = views.MonitorVariable(T.mean(loss.cross_entropy))
        #     avg_crossentropy = tasks.AveragePerEpoch(crossentropy_monitor)
        #     trainer.append_task(avg_crossentropy)
        #
        #     trainer.append_task(tasks.Print("Avg. training L2 err:       : {}", avg_l2err))
        #     trainer.append_task(tasks.Print("Avg. training stopping:     : {}", avg_crossentropy))
        #     trainer.append_task(tasks.Print("L2 err : {0:.4f}", l2err_monitor, each_k_update=100))
        #     trainer.append_task(tasks.Print("stopping : {0:.4f}", crossentropy_monitor, each_k_update=100))

        # Print NLL mean/stderror.
        # train_loss = L2DistanceForSequences(model, trainset)
        # train_batch_scheduler = StreamlinesBatchScheduler(trainset, batch_size=1000,
        #                                                   noisy_streamlines_sigma=None,
        #                                                   nb_updates_per_epoch=None,
        #                                                   seed=1234)

        # train_error = views.LossView(loss=train_loss, batch_scheduler=train_batch_scheduler)
        # trainer.append_task(tasks.Print("Trainset - Error        : {0:.2f} | {1:.2f}", train_error.sum, train_error.mean))

        # HACK: To make sure all subjects in the volume_manager are used in a batch, we have to split the trainset/validset in 2 volume managers
        model.volume_manager = validset_volume_manager
        model.drop_prob = 0.  # Do not use dropout/zoneout for evaluation
        valid_loss = loss_factory(hyperparams, model, validset)
        valid_batch_scheduler = batch_scheduler_factory(hyperparams,
                                                        dataset=validset,
                                                        train_mode=False)

        valid_error = views.LossView(loss=valid_loss, batch_scheduler=valid_batch_scheduler)
        trainer.append_task(tasks.Print("Validset - Error        : {0:.2f} | {1:.2f}", valid_error.sum, valid_error.mean))

        if hyperparams['model'] == 'ffnn_regression':
            valid_batch_scheduler2 = batch_scheduler_factory(hyperparams,
                                                             dataset=validset,
                                                             train_mode=False)

            valid_l2 = loss_factory(hyperparams, model, validset, loss_type="expected_value")
            valid_l2_error = views.LossView(loss=valid_l2, batch_scheduler=valid_batch_scheduler2)
            trainer.append_task(tasks.Print("Validset - {}".format(valid_l2.__class__.__name__) + "\t: {0:.2f} | {1:.2f}", valid_l2_error.sum, valid_l2_error.mean))

        # HACK: Restore trainset volume manager
        model.volume_manager = trainset_volume_manager
        model.drop_prob = hyperparams['drop_prob']  # Restore dropout

        lookahead_loss = valid_error.sum

        direction_norm = views.MonitorVariable(T.sqrt(sum(map(lambda d: T.sqr(d).sum(), loss.gradients.values()))))
        # trainer.append_task(tasks.Print("||d|| : {0:.4f}", direction_norm))

        # logger = tasks.Logger(train_error.mean, valid_error.mean, valid_error.sum, direction_norm)
        logger = tasks.Logger(valid_error.mean, valid_error.sum, direction_norm)
        trainer.append_task(logger)

        if args.view:
            import pylab as plt

            def _plot(*args, **kwargs):
                plt.figure(1)
                plt.clf()
                plt.show(False)
                plt.subplot(121)
                plt.plot(np.array(logger.get_variable_history(0)).flatten(), label="Train")
                plt.plot(np.array(logger.get_variable_history(1)).flatten(), label="Valid")
                plt.legend()

                plt.subplot(122)
                plt.plot(np.array(logger.get_variable_history(3)).flatten(), label="||d'||")
                plt.draw()

            trainer.append_task(tasks.Callback(_plot))

        # Callback function to stop training if NaN is detected.
        def detect_nan(obj, status):
            if np.isnan(model.parameters[0].get_value().sum()):
                print("NaN detected! Stopping training now.")
                sys.exit()

        trainer.append_task(tasks.Callback(detect_nan, each_k_update=1))

        # Callback function to save training progression.
        def save_training(obj, status):
            trainer.save(experiment_path)

        trainer.append_task(tasks.Callback(save_training))

        # Early stopping with a callback for saving every time model improves.
        def save_improvement(obj, status):
            """ Save best model and training progression. """
            if np.isnan(model.parameters[0].get_value().sum()):
                print("NaN detected! Not saving the model. Crashing now.")
                sys.exit()

            print("*** Best epoch: {0} ***\n".format(obj.best_epoch))
            model.save(experiment_path)

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())
        trainer.append_task(tasks.PrintTime(each_k_update=100))  # Profiling

        # Add stopping criteria
        trainer.append_task(stopping_criteria.MaxEpochStopping(args.max_epoch))
        early_stopping = stopping_criteria.EarlyStopping(lookahead_loss, lookahead=args.lookahead, eps=args.lookahead_eps, callback=save_improvement)
        trainer.append_task(early_stopping)

    with Timer("Compiling Theano graph"):
        trainer.build_theano_graph()

    if resuming:
        if not os.path.isdir(pjoin(experiment_path, 'training')):
            print("No 'training/' folder. Assuming it failed before"
                  " the end of the first epoch. Starting a new training.")
        else:
            with Timer("Loading"):
                trainer.load(experiment_path)

    with Timer("Training"):
        trainer.train()
Example #11
0
def test_gru_mixture_track():
    hidden_sizes = 50

    with Timer("Creating dummy volume", newline=True):
        volume_manager = neurotools.VolumeManager()
        dwi, gradients = make_dummy_dwi(nb_gradients=30,
                                        volume_shape=(10, 10, 10),
                                        seed=1234)
        volume = neurotools.resample_dwi(dwi, gradients.bvals,
                                         gradients.bvecs).astype(np.float32)

        volume_manager.register(volume)

    with Timer("Creating model"):
        hyperparams = {
            'model': 'gru_mixture',
            'SGD': "1e-2",
            'hidden_sizes': hidden_sizes,
            'learn_to_stop': False,
            'normalize': False,
            'activation': 'tanh',
            'feed_previous_direction': False,
            'predict_offset': False,
            'use_layer_normalization': False,
            'drop_prob': 0.,
            'use_zoneout': False,
            'skip_connections': False,
            'neighborhood_radius': None,
            'nb_seeds_per_voxel': 2,
            'step_size': 0.5,
            'batch_size': 200,
            'n_gaussians': 2,
            'seed': 1234
        }
        model = factories.model_factory(
            hyperparams,
            input_size=volume_manager.data_dimension,
            output_size=3,
            volume_manager=volume_manager)
        model.initialize(
            factories.weigths_initializer_factory("orthogonal", seed=1234))

    rng = np.random.RandomState(1234)
    mask = np.ones(volume.shape[:3])
    seeding_mask = np.random.randint(2, size=mask.shape)
    seeds = []
    indices = np.array(np.where(seeding_mask)).T
    for idx in indices:
        seeds_in_voxel = idx + rng.uniform(
            -0.5, 0.5, size=(hyperparams['nb_seeds_per_voxel'], 3))
        seeds.extend(seeds_in_voxel)
    seeds = np.array(seeds, dtype=theano.config.floatX)

    is_outside_mask = make_is_outside_mask(mask, np.eye(4), threshold=0.5)
    is_too_long = make_is_too_long(150)
    is_too_curvy = make_is_too_curvy(np.rad2deg(30))
    is_unlikely = make_is_unlikely(0.5)
    is_stopping = make_is_stopping({
        STOPPING_MASK: is_outside_mask,
        STOPPING_LENGTH: is_too_long,
        STOPPING_CURVATURE: is_too_curvy,
        STOPPING_LIKELIHOOD: is_unlikely
    })
    is_stopping.max_nb_points = 150

    args = SimpleNamespace()
    args.track_like_peter = False
    args.pft_nb_retry = 0
    args.pft_nb_backtrack_steps = 0
    args.use_max_component = False
    args.flip_x = False
    args.flip_y = False
    args.flip_z = False
    args.verbose = True

    tractogram = batch_track(model,
                             volume,
                             seeds,
                             step_size=hyperparams['step_size'],
                             is_stopping=is_stopping,
                             batch_size=hyperparams['batch_size'],
                             args=args)

    return True
Example #12
0
def main():
    parser = build_argparser()
    args = parser.parse_args()

    tracto_data = None

    if args.signal_source == "raw_signal":
        signal = nib.load(args.signal)
        signal.get_data()  # Forces loading volume in-memory.
        basename = re.sub('(\.gz|\.nii.gz)$', '', args.signal)

        try:
            bvals = basename + '.bvals' if args.bvals is None else args.bvals
            bvecs = basename + '.bvecs' if args.bvecs is None else args.bvecs
            gradients = gradient_table(bvals, bvecs)
        except FileNotFoundError:
            try:
                bvals = basename + '.bval' if args.bvals is None else args.bvals
                bvecs = basename + '.bvec' if args.bvecs is None else args.bvecs
                gradients = gradient_table(bvals, bvecs)
            except FileNotFoundError as e:
                print("Could not find .bvals/.bvecs or .bval/.bvec files...")
                raise e

        tracto_data = TractographyData(signal, gradients)
    elif args.signal_source == "processed_signal":
        loaded_tracto_data = TractographyData.load(args.tracto_data)
        tracto_data = TractographyData(loaded_tracto_data.signal,
                                       loaded_tracto_data.gradients)

    # Compute matrix that brings streamlines back to diffusion voxel space.
    rasmm2vox_affine = np.linalg.inv(tracto_data.signal.affine)

    # Retrieve data.
    with Timer("Retrieving data", newline=args.verbose):
        for filename in sorted(args.bundles):
            if args.verbose:
                print("{}".format(filename))

            # Load streamlines
            tfile = nib.streamlines.load(filename)
            tractogram = tfile.tractogram

            original_streamlines = tractogram.streamlines
            lengths = length(original_streamlines)
            streamlines = [
                s for (s, l) in zip(original_streamlines, lengths)
                if l >= args.min_length
            ]

            # Make sure file is not empty
            if len(streamlines) > 0:
                if args.subsample_streamlines:
                    output_streamlines = subsample_streamlines(
                        streamlines, args.clustering_threshold,
                        args.removal_distance)

                    print("Total difference: {} / {}".format(
                        len(original_streamlines), len(output_streamlines)))
                    new_tractogram = nib.streamlines.Tractogram(
                        output_streamlines,
                        affine_to_rasmm=tractogram.affine_to_rasmm)
                    tractogram = new_tractogram

                tractogram.apply_affine(rasmm2vox_affine)

                # Add streamlines to the TractogramData
                bundle_name = os.path.splitext(os.path.basename(filename))[0]
                tracto_data.add(tractogram.streamlines, bundle_name)

    if args.verbose:
        diff = tracto_data.streamlines._data - tracto_data.streamlines._data.astype(
            args.dtype)
        precision_error = np.sum(np.sqrt(np.sum(diff**2, axis=1)))
        avg_precision_error = precision_error / len(
            tracto_data.streamlines._data)
        print("Precision error: {} (avg. {})".format(precision_error,
                                                     avg_precision_error))

    # Save streamlines coordinates using either float16 or float32.
    tracto_data.streamlines._data = tracto_data.streamlines._data.astype(
        args.dtype)

    # Save dataset
    tracto_data.save(args.out)
Example #13
0
def test_gru_mixture_fprop_neighborhood():
    hyperparams = {
        'model': 'gru_mixture',
        'SGD': "1e-2",
        'hidden_sizes': 50,
        'batch_size': 16,
        'learn_to_stop': False,
        'normalize': True,
        'activation': 'tanh',
        'feed_previous_direction': False,
        'predict_offset': False,
        'use_layer_normalization': False,
        'drop_prob': 0.,
        'use_zoneout': False,
        'skip_connections': False,
        'seed': 1234,
        'noisy_streamlines_sigma': None,
        'keep_step_size': True,
        'sort_streamlines': False,
        'n_gaussians': 2,
        'neighborhood_radius': 0.5
    }

    with Timer("Creating dataset", newline=True):
        volume_manager = neurotools.VolumeManager()
        trainset = make_dummy_dataset(volume_manager)
        print("Dataset sizes:", len(trainset))

        batch_scheduler = factories.batch_scheduler_factory(hyperparams,
                                                            dataset=trainset)
        print("An epoch will be composed of {} updates.".format(
            batch_scheduler.nb_updates_per_epoch))
        print(volume_manager.data_dimension, hyperparams['hidden_sizes'],
              batch_scheduler.target_size)

    with Timer("Creating model"):
        model = factories.model_factory(
            hyperparams,
            input_size=volume_manager.data_dimension,
            output_size=batch_scheduler.target_size,
            volume_manager=volume_manager)
        model.initialize(
            factories.weigths_initializer_factory("orthogonal", seed=1234))

        print("Input size: {}".format(model.model_input_size))

    # Test fprop with missing streamlines from one subject in a batch
    output = model.get_output(trainset.symb_inputs)
    fct = theano.function([trainset.symb_inputs],
                          output,
                          updates=model.graph_updates)

    batch_inputs, batch_targets, batch_mask = batch_scheduler._next_batch(2)
    out = fct(batch_inputs)

    with Timer("Building optimizer"):
        loss = factories.loss_factory(hyperparams, model, trainset)
        optimizer = factories.optimizer_factory(hyperparams, loss)

    fct_loss = theano.function(
        [trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask],
        loss.loss,
        updates=model.graph_updates)

    loss_value = fct_loss(batch_inputs, batch_targets, batch_mask)
    print("Loss:", loss_value)

    fct_optim = theano.function(
        [trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask],
        list(optimizer.directions.values()),
        updates=model.graph_updates)

    dirs = fct_optim(batch_inputs, batch_targets, batch_mask)

    return True
def main():
    parser = build_argparser()
    args = parser.parse_args()

    signal = nib.load(args.signal)
    data = signal.get_data()

    # Compute matrix that brings streamlines back to diffusion voxel space.
    rasmm2vox_affine = np.linalg.inv(signal.affine)

    # Retrieve data.
    with Timer("Retrieving data"):
        print("Loading {}".format(args.filename))

        # Load streamlines (already in RASmm space)
        tfile = nib.streamlines.load(args.filename)
        tfile.tractogram.apply_affine(rasmm2vox_affine)

        # tfile.tractogram.apply_affine(rasmm2vox_affine)
        tractogram = Tractogram(streamlines=tfile.streamlines,
                                affine_to_rasmm=signal.affine)

    with Timer("Filtering streamlines"):

        # Get volume bounds
        x_max = data.shape[0] - 0.5
        y_max = data.shape[1] - 0.5
        z_max = data.shape[2] - 0.5

        mask = np.ones((len(tractogram), )).astype(bool)

        for i, s in enumerate(tractogram.streamlines):

            # Identify streamlines out of bounds
            oob_test = np.logical_or.reduce((
                s[:, 0] < -0.5,
                s[:, 0] >= x_max,  # Out of bounds on axis X
                s[:, 1] < -0.5,
                s[:, 1] >= y_max,  # Out of bounds on axis Y
                s[:, 2] < -0.5,
                s[:, 2] >= z_max))  # Out of bounds on axis Z

            if np.any(oob_test):
                mask[i] = False

        tractogram_filtered = tractogram[mask]
        tractogram_removed = tractogram[np.logical_not(mask)]

        print("Kept {} streamlines and removed {} streamlines".format(
            len(tractogram_filtered), len(tractogram_removed)))

    with Timer("Saving filtered and removed streamlines"):
        base_filename = args.out_prefix
        if args.out_prefix is None:
            base_filename = args.filename[:-4]

        tractogram_filtered_filename = "{}_filtered.tck".format(base_filename)
        tractogram_removed_filename = "{}_removed.tck".format(base_filename)

        # Save streamlines
        nib.streamlines.save(tractogram_filtered, tractogram_filtered_filename)
        nib.streamlines.save(tractogram_removed, tractogram_removed_filename)
Example #15
0
def main():
    parser = build_argparser()
    args = parser.parse_args()
    print(args)
    print("Using Theano v.{}".format(theano.version.short_version))

    hyperparams_to_exclude = ['max_epoch', 'force', 'name', 'view']
    # Use this for hyperparams added in a new version, but nonexistent from older versions
    retrocompatibility_defaults = {'use_layer_normalization': False}
    experiment_path, hyperparams, resuming = utils.maybe_create_experiment_folder(
        args,
        exclude=hyperparams_to_exclude,
        retrocompatibility_defaults=retrocompatibility_defaults)

    # Log the command currently running.
    with open(pjoin(experiment_path, 'cmd.txt'), 'a') as f:
        f.write(" ".join(sys.argv) + "\n")

    print("Resuming:" if resuming else "Creating:", experiment_path)

    with Timer("Loading dataset", newline=True):
        trainset_volume_manager = VolumeManager()
        validset_volume_manager = VolumeManager()
        trainset = datasets.load_mask_classifier_dataset(
            args.train_subjects,
            trainset_volume_manager,
            name="trainset",
            use_sh_coeffs=args.use_sh_coeffs)
        validset = datasets.load_mask_classifier_dataset(
            args.valid_subjects,
            validset_volume_manager,
            name="validset",
            use_sh_coeffs=args.use_sh_coeffs)
        print("Dataset sizes:", len(trainset), " |", len(validset))

        batch_scheduler = MaskClassifierBatchScheduler(
            trainset, hyperparams['batch_size'], seed=hyperparams['seed'])
        print("An epoch will be composed of {} updates.".format(
            batch_scheduler.nb_updates_per_epoch))
        print(trainset_volume_manager.data_dimension, args.hidden_sizes,
              batch_scheduler.target_size)

    with Timer("Creating model"):
        input_size = trainset_volume_manager.data_dimension

        model = FFNN_Classification(trainset_volume_manager, input_size,
                                    hyperparams['hidden_sizes'])
        model.initialize(
            weigths_initializer_factory(args.weights_initialization,
                                        seed=args.initialization_seed))

    with Timer("Building optimizer"):
        loss = BinaryCrossEntropy(model, trainset)

        if args.clip_gradient is not None:
            loss.append_gradient_modifier(
                DirectionClipping(threshold=args.clip_gradient))

        optimizer = optimizer_factory(hyperparams, loss)

    with Timer("Building trainer"):
        trainer = Trainer(optimizer, batch_scheduler)

        # Log training error
        loss_monitor = views.MonitorVariable(loss.loss)
        avg_loss = tasks.AveragePerEpoch(loss_monitor)
        trainer.append_task(avg_loss)

        # Print average training loss.
        trainer.append_task(
            tasks.Print("Avg. training loss:         : {}", avg_loss))

        # HACK: To make sure all subjects in the volume_manager are used in a batch, we have to split the trainset/validset in 2 volume managers
        model.volume_manager = validset_volume_manager
        valid_loss = BinaryCrossEntropy(model, validset)
        valid_batch_scheduler = MaskClassifierBatchScheduler(
            validset, hyperparams['batch_size'], seed=hyperparams['seed'])

        valid_error = views.LossView(loss=valid_loss,
                                     batch_scheduler=valid_batch_scheduler)
        trainer.append_task(
            tasks.Print("Validset - Error        : {0:.2f} | {1:.2f}",
                        valid_error.sum, valid_error.mean))

        # HACK: Restore trainset volume manager
        model.volume_manager = trainset_volume_manager

        lookahead_loss = valid_error.sum

        direction_norm = views.MonitorVariable(
            T.sqrt(sum(map(lambda d: T.sqr(d).sum(),
                           loss.gradients.values()))))
        # trainer.append_task(tasks.Print("||d|| : {0:.4f}", direction_norm))

        # logger = tasks.Logger(train_error.mean, valid_error.mean, valid_error.sum, direction_norm)
        logger = tasks.Logger(valid_error.mean, valid_error.sum,
                              direction_norm)
        trainer.append_task(logger)

        # Callback function to stop training if NaN is detected.
        def detect_nan(obj, status):
            if np.isnan(model.parameters[0].get_value().sum()):
                print("NaN detected! Stopping training now.")
                sys.exit()

        trainer.append_task(tasks.Callback(detect_nan, each_k_update=1))

        # Callback function to save training progression.
        def save_training(obj, status):
            trainer.save(experiment_path)

        trainer.append_task(tasks.Callback(save_training))

        # Early stopping with a callback for saving every time model improves.
        def save_improvement(obj, status):
            """ Save best model and training progression. """
            if np.isnan(model.parameters[0].get_value().sum()):
                print("NaN detected! Not saving the model. Crashing now.")
                sys.exit()

            print("*** Best epoch: {0} ***\n".format(obj.best_epoch))
            model.save(experiment_path)

        # Print time for one epoch
        trainer.append_task(tasks.PrintEpochDuration())
        trainer.append_task(tasks.PrintTrainingDuration())
        trainer.append_task(tasks.PrintTime(each_k_update=100))  # Profiling

        # Add stopping criteria
        trainer.append_task(stopping_criteria.MaxEpochStopping(args.max_epoch))
        early_stopping = stopping_criteria.EarlyStopping(
            lookahead_loss,
            lookahead=args.lookahead,
            eps=args.lookahead_eps,
            callback=save_improvement)
        trainer.append_task(early_stopping)

    with Timer("Compiling Theano graph"):
        trainer.build_theano_graph()

    if resuming:
        if not os.path.isdir(pjoin(experiment_path, 'training')):
            print("No 'training/' folder. Assuming it failed before"
                  " the end of the first epoch. Starting a new training.")
        else:
            with Timer("Loading"):
                trainer.load(experiment_path)

    with Timer("Training"):
        trainer.train()
def main():
    parser = buildArgsParser()
    args = parser.parse_args()

    data = MaskClassifierData.load(args.dataset)
    positive_coords = data.positive_coords
    negative_coords = data.negative_coords

    rng = np.random.RandomState(args.seed)

    with Timer("Splitting {} using split: {}".format(args.dataset,
                                                     args.split)):
        nb_positive_examples = positive_coords.shape[0]
        nb_negative_examples = negative_coords.shape[0]

        positive_indices = np.arange(nb_positive_examples)
        negative_indices = np.arange(nb_negative_examples)

        rng.shuffle(positive_indices)
        rng.shuffle(negative_indices)

        train_positive_size = int(
            np.round(args.split[0] * nb_positive_examples))
        train_negative_size = int(
            np.round(args.split[0] * nb_negative_examples))

        valid_positive_size = int(
            np.round(args.split[1] * nb_positive_examples))
        valid_negative_size = int(
            np.round(args.split[1] * nb_negative_examples))

        test_positive_size = int(np.round(args.split[2] *
                                          nb_positive_examples))
        test_negative_size = int(np.round(args.split[2] *
                                          nb_negative_examples))

        # Make sure the splits sum to nb_examples
        test_positive_size += nb_positive_examples - (
            train_positive_size + valid_positive_size + test_positive_size)
        test_negative_size += nb_negative_examples - (
            train_negative_size + valid_negative_size + test_negative_size)

        assert train_positive_size + valid_positive_size + test_positive_size == nb_positive_examples
        assert train_negative_size + valid_negative_size + test_negative_size == nb_negative_examples

        train_positive_indices = positive_indices[:train_positive_size]
        valid_positive_indices = positive_indices[
            train_positive_size:train_positive_size + valid_positive_size]
        test_positive_indices = positive_indices[train_positive_size +
                                                 valid_positive_size:]

        train_negative_indices = negative_indices[:train_negative_size]
        valid_negative_indices = negative_indices[
            train_negative_size:train_negative_size + valid_negative_size]
        test_negative_indices = negative_indices[train_negative_size +
                                                 valid_negative_size:]

        train_data = MaskClassifierData(
            data.signal, data.gradients, data.mask,
            positive_coords[train_positive_indices],
            negative_coords[train_negative_indices])
        valid_data = MaskClassifierData(
            data.signal, data.gradients, data.mask,
            positive_coords[valid_positive_indices],
            negative_coords[valid_negative_indices])
        test_data = MaskClassifierData(data.signal, data.gradients, data.mask,
                                       positive_coords[test_positive_indices],
                                       negative_coords[test_negative_indices])

    with Timer("Saving"):
        train_data.save(args.dataset[:-4] + "_trainset.npz")
        valid_data.save(args.dataset[:-4] + "_validset.npz")
        test_data.save(args.dataset[:-4] + "_testset.npz")

    if args.delete:
        os.remove(args.dataset)
Example #17
0
def main():
    parser = build_argparser()
    args = parser.parse_args()
    print(args)
    print("Using Theano v.{}".format(theano.version.short_version))

    with Timer("Loading dataset", newline=True):
        volume_manager = VolumeManager()
        dataset = datasets.load_tractography_dataset(
            args.subjects,
            volume_manager,
            name="dataset",
            use_sh_coeffs=args.use_sh_coeffs)
        print("Total streamlines: {}".format(len(dataset)))

    with Timer("Running T-SNE", newline=True):

        batch_scheduler = TractographyBatchScheduler(
            dataset,
            batch_size=min(len(dataset), 100000),
            noisy_streamlines_sigma=False,
            seed=1234,
            normalize_target=True)
        rng = np.random.RandomState(42)
        rng.shuffle(batch_scheduler.indices)

        # bundle_name_pattern = "CST_Left"
        # batch_inputs, batch_targets, batch_mask = batch_scheduler._prepare_batch(trainset.get_bundle(bundle_name_pattern, return_idx=True))
        inputs, _, mask = batch_scheduler._next_batch(0)

        # Keep the same number of streamlines per subject
        new_inputs = []
        new_mask = []
        for i in range(len(args.subjects)):
            subset = inputs[:, 0, -1] == i
            if subset.sum() < args.nb_streamlines_per_subject:
                raise NameError(
                    "Not enough streamlines for subject #{}".format(i))

            new_inputs += [inputs[subset][:args.nb_streamlines_per_subject]]
            new_mask += [mask[subset][:args.nb_streamlines_per_subject]]

        inputs = np.concatenate([new_inputs], axis=0)
        mask = np.concatenate([new_mask], axis=0)

        mask = mask.astype(bool)
        idx = np.arange(mask.sum())
        rng.shuffle(idx)

        coords = T.matrix('coords')
        eval_at_coords = theano.function([coords],
                                         volume_manager.eval_at_coords(coords))

        M = 2000 * len(dataset.subjects)
        coords = inputs[mask][idx[:M]]
        X = eval_at_coords(coords)

        from sklearn.manifold.t_sne import TSNE
        tsne = TSNE(n_components=2, verbose=2, random_state=42)
        Y = tsne.fit_transform(X)

        import matplotlib.pyplot as plt
        plt.figure()
        ids = range(len(dataset.subjects))
        markers = ['s', 'o', '^', 'v', '<', '>', 'h']
        colors = ['cyan', 'darkorange', 'darkgreen', 'magenta', 'pink', 'k']
        for i, marker, color in zip(ids, markers, colors):
            idx = coords[:, -1] == i
            print("Subject #{}: ".format(i), idx.sum())
            plt.scatter(Y[idx, 0],
                        Y[idx, 1],
                        20,
                        color=color,
                        marker=marker,
                        label="Subject #{}".format(i))

        plt.legend()
        plt.show()
Example #18
0
def main():
    parser = build_parser()
    args = parser.parse_args()
    print(args)

    # Get experiment folder
    experiment_path = args.name
    if not os.path.isdir(experiment_path):
        # If not a directory, it must be the name of the experiment.
        experiment_path = pjoin(".", "experiments", args.name)

    if not os.path.isdir(experiment_path):
        parser.error('Cannot find experiment: {0}!'.format(args.name))

    # Load experiments hyperparameters
    try:
        hyperparams = smartutils.load_dict_from_json_file(
            pjoin(experiment_path, "hyperparams.json"))
    except FileNotFoundError:
        hyperparams = smartutils.load_dict_from_json_file(
            pjoin(experiment_path, "..", "hyperparams.json"))

    # Use this for hyperparams added in a new version, but nonexistent from older versions
    retrocompatibility_defaults = {
        'feed_previous_direction': False,
        'predict_offset': False,
        'normalize': False,
        'keep_step_size': False,
        'sort_streamlines': False,
        'use_layer_normalization': False,
        'drop_prob': 0.,
        'use_zoneout': False
    }
    for new_hyperparams, default_value in retrocompatibility_defaults.items():
        if new_hyperparams not in hyperparams:
            hyperparams[new_hyperparams] = default_value

    with Timer("Loading dataset", newline=True):
        volume_manager = VolumeManager()
        dataset = datasets.load_tractography_dataset(
            args.subjects,
            volume_manager,
            name="dataset",
            use_sh_coeffs=hyperparams['use_sh_coeffs'])
        print("Dataset size:", len(dataset))

    with Timer("Loading model"):
        model = None
        if hyperparams['model'] == 'gru_regression':
            from learn2track.models import GRU_Regression
            model = GRU_Regression.create(experiment_path,
                                          volume_manager=volume_manager)
        elif hyperparams['model'] == 'gru_gaussian':
            from learn2track.models import GRU_Gaussian
            model = GRU_Gaussian.create(experiment_path,
                                        volume_manager=volume_manager)
        elif hyperparams['model'] == 'gru_mixture':
            from learn2track.models import GRU_Mixture
            model = GRU_Mixture.create(experiment_path,
                                       volume_manager=volume_manager)
        elif hyperparams['model'] == 'gru_multistep':
            from learn2track.models import GRU_Multistep_Gaussian
            model = GRU_Multistep_Gaussian.create(
                experiment_path, volume_manager=volume_manager)
            model.k = 1
            model.m = 1
        elif hyperparams['model'] == 'ffnn_regression':
            from learn2track.models import FFNN_Regression
            model = FFNN_Regression.create(experiment_path,
                                           volume_manager=volume_manager)
        else:
            raise NameError("Unknown model: {}".format(hyperparams['model']))
        model.drop_prob = 0.  # Make sure dropout/zoneout is not used when testing

    with Timer("Building evaluation function"):
        # Override K for gru_multistep
        if 'k' in hyperparams:
            hyperparams['k'] = 1

        batch_scheduler = batch_scheduler_factory(
            hyperparams,
            dataset,
            train_mode=False,
            batch_size_override=args.batch_size)
        loss = loss_factory(hyperparams,
                            model,
                            dataset,
                            loss_type=args.loss_type)
        l2_error = views.LossView(loss=loss, batch_scheduler=batch_scheduler)

    with Timer("Evaluating...", newline=True):
        results_file = pjoin(experiment_path, "results.json")
        results = {}
        if os.path.isfile(results_file) and not args.force:
            print(
                "Loading saved results... (use --force to re-run evaluation)")
            results = smartutils.load_dict_from_json_file(results_file)

        tag = ""
        if args.loss_type == 'expected_value' or hyperparams[
                'model'] == 'gru_regression':
            tag = "_EV_L2_error"
        elif args.loss_type == 'maximum_component':
            tag = "_MC_L2_error"
        elif hyperparams['model'] in [
                'gru_gaussian', 'gru_mixture', 'gru_multistep'
        ]:
            tag = "_NLL"

        entry = args.dataset_name + tag

        if entry not in results or args.force:
            with Timer("Evaluating {}".format(entry)):
                dummy_status = Status()  # Forces recomputing results
                results[entry] = {
                    'mean': float(l2_error.mean.view(dummy_status)),
                    'stderror': float(l2_error.stderror.view(dummy_status))
                }
                smartutils.save_dict_to_json_file(
                    results_file, results)  # Update results file.

        print("{}: {:.4f} ± {:.4f}".format(entry, results[entry]['mean'],
                                           results[entry]['stderror']))
Example #19
0
def test_gru_multistep_fprop_k3():
    hidden_sizes = 50

    hyperparams = {
        'model': 'gru_multistep',
        'k': 3,
        'm': 3,
        'batch_size': 16,
        'SGD': "1e-2",
        'hidden_sizes': hidden_sizes,
        'learn_to_stop': False,
        'normalize': False,
        'noisy_streamlines_sigma': None,
        'shuffle_streamlines': True,
        'seed': 1234
    }

    with Timer("Creating dataset", newline=True):
        volume_manager = neurotools.VolumeManager()
        trainset = make_dummy_dataset(volume_manager)
        print("Dataset sizes:", len(trainset))

        batch_scheduler = factories.batch_scheduler_factory(hyperparams,
                                                            trainset,
                                                            train_mode=True)
        print("An epoch will be composed of {} updates.".format(
            batch_scheduler.nb_updates_per_epoch))
        print(volume_manager.data_dimension, hidden_sizes,
              batch_scheduler.target_size)

    with Timer("Creating model"):
        model = factories.model_factory(
            hyperparams,
            input_size=volume_manager.data_dimension,
            output_size=batch_scheduler.target_size,
            volume_manager=volume_manager)
        model.initialize(
            factories.weigths_initializer_factory("orthogonal", seed=1234))

    # Test fprop
    output = model.get_output(trainset.symb_inputs)
    fct = theano.function([trainset.symb_inputs],
                          output,
                          updates=model.graph_updates)

    batch_inputs, batch_targets, batch_mask = batch_scheduler._next_batch(2)
    out = fct(batch_inputs)

    with Timer("Building optimizer"):
        loss = factories.loss_factory(hyperparams, model, trainset)
        optimizer = factories.optimizer_factory(hyperparams, loss)

    fct_loss = theano.function(
        [trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask],
        loss.loss,
        updates=model.graph_updates)

    loss_value = fct_loss(batch_inputs, batch_targets, batch_mask)
    print("Loss:", loss_value)

    fct_optim = theano.function(
        [trainset.symb_inputs, trainset.symb_targets, trainset.symb_mask],
        list(optimizer.directions.values()),
        updates=model.graph_updates)

    dirs = fct_optim(batch_inputs, batch_targets, batch_mask)
Example #20
0
def main():
    parser = build_argparser()
    args = parser.parse_args()

    # Get experiment folder
    experiment_path = args.name
    if not os.path.isdir(experiment_path):
        # If not a directory, it must be the name of the experiment.
        experiment_path = pjoin(".", "experiments", args.name)

    if not os.path.isdir(experiment_path):
        parser.error('Cannot find experiment: {0}!'.format(args.name))

    # Load experiments hyperparameters
    try:
        hyperparams = smartutils.load_dict_from_json_file(
            pjoin(experiment_path, "hyperparams.json"))
    except FileNotFoundError:
        hyperparams = smartutils.load_dict_from_json_file(
            pjoin(experiment_path, "..", "hyperparams.json"))

    with Timer("Loading DWIs"):
        # Load gradients table
        dwi_name = args.dwi
        if dwi_name.endswith(".gz"):
            dwi_name = dwi_name[:-3]
        if dwi_name.endswith(".nii"):
            dwi_name = dwi_name[:-4]
        bvals_filename = dwi_name + ".bvals"
        bvecs_filename = dwi_name + ".bvecs"
        bvals, bvecs = dipy.io.gradients.read_bvals_bvecs(
            bvals_filename, bvecs_filename)

        dwi = nib.load(args.dwi)
        if hyperparams["use_sh_coeffs"]:
            # Use 45 spherical harmonic coefficients to represent the diffusion signal.
            weights = neurotools.get_spherical_harmonics_coefficients(
                dwi, bvals, bvecs).astype(np.float32)
        else:
            # Resample the diffusion signal to have 100 directions.
            weights = neurotools.resample_dwi(dwi, bvals,
                                              bvecs).astype(np.float32)

    with Timer("Loading model"):
        if hyperparams["model"] == "ffnn_classification":
            from learn2track.models import FFNN_Classification
            model_class = FFNN_Classification
        else:
            raise ValueError("Unknown model!")

        kwargs = {}
        volume_manager = neurotools.VolumeManager()
        volume_manager.register(weights)
        kwargs['volume_manager'] = volume_manager

        # Load the actual model.
        model = model_class.create(
            pjoin(experiment_path),
            **kwargs)  # Create new instance and restore model.
        print(str(model))

    with Timer("Generating mask"):
        symb_input = T.matrix(name="input")
        model_symb_pred = model.get_output(symb_input)
        f = theano.function(inputs=[symb_input], outputs=[model_symb_pred])

        generated_mask = np.zeros(dwi.shape[:3]).astype(np.float32)

        # all_coords.shape = (n_coords, 3)
        all_coords = np.argwhere(generated_mask == 0)

        volume_ids = np.zeros((all_coords.shape[0], 1))
        all_coords_and_volume_ids = np.concatenate((all_coords, volume_ids),
                                                   axis=1).astype(np.float32)

        batch_size = args.batch_size if args.batch_size else len(
            all_coords_and_volume_ids)
        probs = []
        while batch_size > 1:
            print("Trying to to process batches of size {} out of {}".format(
                batch_size, len(all_coords_and_volume_ids)))
            nb_batches = int(
                np.ceil(len(all_coords_and_volume_ids) / batch_size))
            try:
                for batch_count in range(nb_batches):
                    start = batch_count * batch_size
                    end = (batch_count + 1) * batch_size
                    probs.extend(f(all_coords_and_volume_ids[start:end])[-1])
                    print("Generated batch {} out of {}".format(
                        batch_count + 1, nb_batches))
                break
            except MemoryError:
                print("{} coordinates at the same time is too much!".format(
                    batch_size))
                batch_size //= 2
            except RuntimeError:
                print("{} coordinates at the same time is too much!".format(
                    batch_size))
                batch_size //= 2
        if not probs:
            raise RuntimeError("Could not generate predictions...")

        generated_mask[np.where(generated_mask == 0)] = np.array(probs) > 0.5

    with Timer("Saving generated mask"):
        filename = args.out
        if args.out is None:
            prefix = args.prefix
            if prefix is None:
                dwi_name = os.path.basename(args.dwi)
                if dwi_name.endswith(".nii.gz"):
                    dwi_name = dwi_name[:-7]
                else:  # .nii
                    dwi_name = dwi_name[:-4]

                prefix = os.path.basename(os.path.dirname(args.dwi)) + dwi_name
                prefix = prefix.replace(".", "_")

            filename = "{}.nii.gz".format(prefix)

        save_path = pjoin(experiment_path, filename)
        try:  # Create dirs, if needed.
            os.makedirs(os.path.dirname(save_path))
        except:
            pass

        print("Saving to {}".format(save_path))
        mask = nib.Nifti1Image(generated_mask, dwi.affine)
        nib.save(mask, save_path)