Beispiel #1
0
    def __init__(self,
                 volume_manager,
                 input_size,
                 hidden_sizes,
                 target_dims,
                 k,
                 m,
                 seed,
                 use_previous_direction=False,
                 use_layer_normalization=False,
                 drop_prob=0.,
                 use_zoneout=False,
                 **_):
        """
        Parameters
        ----------
        volume_manager : :class:`VolumeManger` object
            Used to evaluate the diffusion signal at specific coordinates using multiple subjects
        input_size : int
            Number of units each element Xi in the input sequence X has.
        hidden_sizes : int, list of int
            Number of hidden units each GRU should have.
        target_dims : int
            Number of dimensions of the multivariate gaussian to estimate; the model outputs two distribution parameters for each dimension
        k : int
            Number of steps ahead to predict (the model will predict all steps up to k)
        m : int
            Number of Monte-Carlo samples used to estimate the gaussian parameters
        seed : int
            Random seed to initialize the random noise used for sampling and dropout.
        use_previous_direction : bool
            Use the previous direction as an additional input
        use_layer_normalization : bool
            Use LayerNormalization to normalize preactivations and stabilize hidden layer evolution
        drop_prob : float
            Dropout/Zoneout probability for recurrent networks. See: https://arxiv.org/pdf/1512.05287.pdf & https://arxiv.org/pdf/1606.01305.pdf
        use_zoneout : bool
            Use zoneout implementation instead of dropout
        """
        super().__init__(input_size, hidden_sizes, use_layer_normalization,
                         drop_prob, use_zoneout, seed)
        self.target_dims = target_dims
        self.target_size = 2 * self.target_dims  # Output distribution parameters mu and sigma for each dimension

        self.volume_manager = volume_manager

        self.k = k
        self.m = m
        self.seed = seed

        self.use_previous_direction = use_previous_direction

        self.srng = MRG_RandomStreams(self.seed)

        # Do not use dropout/zoneout in last hidden layer
        self.layer_regression = LayerRegression(self.hidden_sizes[-1],
                                                self.target_size,
                                                normed=False)
 def __init__(self, input_size, hidden_sizes, output_size, **_):
     """
     Parameters
     ----------
     input_size : int
         Number of units each element Xi in the input sequence X has.
     hidden_sizes : int, list of int
         Number of hidden units each GRU should have.
     output_size : int
         Number of units the regression layer should have.
     """
     super().__init__(input_size, hidden_sizes)
     self.output_size = output_size
     self.layer_regression = LayerRegression(self.hidden_sizes[-1],
                                             self.output_size)
     self.stopping_layer = LayerDense(self.hidden_sizes[-1] + input_size,
                                      1,
                                      activation="sigmoid",
                                      name="stopping")
 def __init__(self, input_size, hidden_sizes, output_size, **_):
     """
     Parameters
     ----------
     input_size : int
         Number of units each element Xi in the input sequence X has.
     hidden_sizes : int, list of int
         Number of hidden units each GRU should have.
     output_size : int
         Number of units the regression layer should have.
     """
     super().__init__(input_size, hidden_sizes)
     self.output_size = output_size
     self.layer_regression = LayerRegression(self.hidden_sizes[-1], self.output_size)
     self.stopping_layer = LayerDense(self.hidden_sizes[-1] + input_size, 1, activation="sigmoid", name="stopping")
 def __init__(self, volume_manager, input_size, hidden_sizes, output_size, use_previous_direction=False, **_):
     """
     Parameters
     ----------
     volume_manager : :class:`VolumeManger` object
         Use to evaluate the diffusion signal at specific coordinates.
     input_size : int
         Number of units each element Xi in the input sequence X has.
     hidden_sizes : int, list of int
         Number of hidden units each GRU should have.
     output_size : int
         Number of units the regression layer should have.
     use_previous_direction : bool
         Use the previous direction as an additional input
     """
     super().__init__(input_size, hidden_sizes)
     self.volume_manager = volume_manager
     self.output_size = output_size
     self.use_previous_direction = use_previous_direction
     self.layer_regression = LayerRegression(self.hidden_sizes[-1], self.output_size)
Beispiel #5
0
    def __init__(
        self, volume_manager, input_size, hidden_sizes, target_dims, k, m, seed, use_previous_direction=False, **_
    ):
        """
        Parameters
        ----------
        volume_manager : :class:`VolumeManger` object
            Used to evaluate the diffusion signal at specific coordinates using multiple subjects
        input_size : int
            Number of units each element Xi in the input sequence X has.
        hidden_sizes : int, list of int
            Number of hidden units each GRU should have.
        target_dims : int
            Number of dimensions of the multivariate gaussian to estimate; the model outputs two distribution parameters for each dimension
        k : int
            Number of steps ahead to predict (the model will predict all steps up to k)
        m : int
            Number of Monte-Carlo samples used to estimate the gaussian parameters
        seed : int
            Random seed to initialize the random noise used for sampling
        use_previous_direction : bool
            Use the previous direction as an additional input
        """
        super().__init__(input_size, hidden_sizes)
        self.target_dims = target_dims
        self.target_size = 2 * self.target_dims  # Output distribution parameters mu and sigma for each dimension
        self.layer_regression = LayerRegression(self.hidden_sizes[-1], self.target_size, normed=False)

        self.volume_manager = volume_manager

        self.k = k
        self.m = m
        self.seed = seed

        self.use_previous_direction = use_previous_direction

        self.srng = MRG_RandomStreams(self.seed)
class GRU_RegressionAndBinaryClassification(GRU):
    """ A standard GRU model with both a regression output layer and
    binary classification output layer.

    The regression layer consists in fully connected layer (DenseLayer)
    whereas the binary classification layer consists in a fully connected
    layer with a sigmoid non-linearity to learn when to stop.
    """
    def __init__(self, input_size, hidden_sizes, output_size, **_):
        """
        Parameters
        ----------
        input_size : int
            Number of units each element Xi in the input sequence X has.
        hidden_sizes : int, list of int
            Number of hidden units each GRU should have.
        output_size : int
            Number of units the regression layer should have.
        """
        super().__init__(input_size, hidden_sizes)
        self.output_size = output_size
        self.layer_regression = LayerRegression(self.hidden_sizes[-1], self.output_size)
        self.stopping_layer = LayerDense(self.hidden_sizes[-1] + input_size, 1, activation="sigmoid", name="stopping")

    def initialize(self, weights_initializer=initer.UniformInitializer(1234)):
        super().initialize(weights_initializer)
        self.layer_regression.initialize(weights_initializer)
        self.stopping_layer.initialize(weights_initializer)

    @property
    def hyperparameters(self):
        hyperparameters = super().hyperparameters
        hyperparameters['output_size'] = self.output_size
        return hyperparameters

    @property
    def parameters(self):
        return super().parameters + self.layer_regression.parameters + self.stopping_layer.parameters

    def _fprop(self, Xi, Xi_plus1, *args):
        outputs = super()._fprop(Xi, *args)
        last_layer_h = outputs[len(self.hidden_sizes)-1]
        regression_out = self.layer_regression.fprop(last_layer_h)
        stopping = self.stopping_layer.fprop(T.concatenate([last_layer_h, Xi_plus1], axis=1))

        return outputs + (regression_out, stopping)

    def get_output(self, X):
        outputs_info_h = []
        for hidden_size in self.hidden_sizes:
            outputs_info_h.append(T.zeros((X.shape[0], hidden_size)))

        results, updates = theano.scan(fn=self._fprop,
                                       outputs_info=outputs_info_h + [None, None],
                                       sequences=[{"input": T.transpose(X, axes=(1, 0, 2)),  # We want to scan over sequence elements, not the examples.
                                                   "taps": [0, 1]}],
                                       n_steps=X.shape[1]-1)

        self.graph_updates = updates
        # Put back the examples so they are in the first dimension.
        self.regression_out = T.transpose(results[-2], axes=(1, 0, 2))
        self.stopping = T.transpose(results[-1], axes=(1, 0, 2))

        return self.regression_out, self.stopping

    def use(self, X):
        directions = self.get_output(X)
        return directions
class GRU_Regression(GRU):
    """ A standard GRU model with a regression layer stacked on top of it.
    """
    def __init__(self, volume_manager, input_size, hidden_sizes, output_size, use_previous_direction=False, **_):
        """
        Parameters
        ----------
        volume_manager : :class:`VolumeManger` object
            Use to evaluate the diffusion signal at specific coordinates.
        input_size : int
            Number of units each element Xi in the input sequence X has.
        hidden_sizes : int, list of int
            Number of hidden units each GRU should have.
        output_size : int
            Number of units the regression layer should have.
        use_previous_direction : bool
            Use the previous direction as an additional input
        """
        super().__init__(input_size, hidden_sizes)
        self.volume_manager = volume_manager
        self.output_size = output_size
        self.use_previous_direction = use_previous_direction
        self.layer_regression = LayerRegression(self.hidden_sizes[-1], self.output_size)

    def initialize(self, weights_initializer=initer.UniformInitializer(1234)):
        super().initialize(weights_initializer)
        self.layer_regression.initialize(weights_initializer)

    @property
    def hyperparameters(self):
        hyperparameters = super().hyperparameters
        hyperparameters['output_size'] = self.output_size
        hyperparameters['use_previous_direction'] = self.use_previous_direction
        return hyperparameters

    @property
    def parameters(self):
        return super().parameters + self.layer_regression.parameters

    def _fprop_step(self, Xi, *args):
        # Xi.shape : (batch_size, 4)    *if self.use_previous_direction, Xi.shape : (batch_size,7)
        # coords + dwi ID (+ previous_direction)

        # coords : streamlines 3D coordinates.
        # coords.shape : (batch_size, 4) where the last column is a dwi ID.
        # args.shape : n_layers * (batch_size, layer_size)
        coords = Xi[:, :4]

        # Get diffusion data.
        # data_at_coords.shape : (batch_size, input_size)
        data_at_coords = self.volume_manager.eval_at_coords(coords)

        if self.use_previous_direction:
            # previous_direction.shape : (batch_size, 3)
            previous_direction = Xi[:, 4:]
            fprop_input = T.concatenate([data_at_coords, previous_direction], axis=1)
        else:
            fprop_input = data_at_coords

        # Hidden state to be passed to the next GRU iteration (next _fprop call)
        # next_hidden_state.shape : n_layers * (batch_size, layer_size)
        next_hidden_state = super()._fprop(fprop_input, *args)

        # Compute the direction to follow for step (t)
        regression_out = self.layer_regression.fprop(next_hidden_state[-1])

        return next_hidden_state + (regression_out,)

    def get_output(self, X):
        # X.shape : (batch_size, seq_len, n_features=[4|7])
        # For tractography n_features is (x,y,z) + (dwi_id,) + [previous_direction]

        outputs_info_h = []
        for hidden_size in self.hidden_sizes:
            outputs_info_h.append(T.zeros((X.shape[0], hidden_size)))

        results, updates = theano.scan(fn=self._fprop_step,
                                       # We want to scan over sequence elements, not the examples.
                                       sequences=[T.transpose(X, axes=(1, 0, 2))],
                                       outputs_info=outputs_info_h + [None],
                                       non_sequences=self.parameters + self.volume_manager.volumes,
                                       strict=True)

        self.graph_updates = updates
        # Put back the examples so they are in the first dimension.
        # regression_out.shape : (batch_size, seq_len, target_size=3)
        self.regression_out = T.transpose(results[-1], axes=(1, 0, 2))
        return self.regression_out

    def make_sequence_generator(self, subject_id=0, **_):
        """ Makes functions that return the prediction for x_{t+1} for every
        sequence in the batch given x_{t} and the current state of the model h^{l}_{t}.

        Parameters
        ----------
        subject_id : int, optional
            ID of the subject from which its diffusion data will be used. Default: 0.
        """

        # Build the sequence generator as a theano function.
        states_h = []
        for i in range(len(self.hidden_sizes)):
            state_h = T.matrix(name="layer{}_state_h".format(i))
            states_h.append(state_h)

        symb_x_t = T.matrix(name="x_t")

        new_states = self._fprop_step(symb_x_t, *states_h)
        new_states_h = new_states[:len(self.hidden_sizes)]

        # predictions.shape : (batch_size, target_size)
        predictions = new_states[-1]

        f = theano.function(inputs=[symb_x_t] + states_h,
                            outputs=[predictions] + list(new_states_h))

        def _gen(x_t, states, previous_direction=None):
            """ Returns the prediction for x_{t+1} for every
                sequence in the batch given x_{t} and the current states
                of the model h^{l}_{t}.

            Parameters
            ----------
            x_t : ndarray with shape (batch_size, 3)
                Streamline coordinate (x, y, z).
            states : list of 2D array of shape (batch_size, hidden_size)
                Currrent states of the network.
            previous_direction : ndarray with shape (batch_size, 3)
                If using previous direction, these should be added to the input

            Returns
            -------
            next_x_t : ndarray with shape (batch_size, 3)
                Directions to follow.
            new_states : list of 2D array of shape (batch_size, hidden_size)
                Updated states of the network after seeing x_t.
            """
            # Append the DWI ID of each sequence after the 3D coordinates.
            subject_ids = np.array([subject_id] * len(x_t), dtype=floatX)[:, None]

            if not self.use_previous_direction:
                x_t = np.c_[x_t, subject_ids]
            else:
                x_t = np.c_[x_t, subject_ids, previous_direction]

            results = f(x_t, *states)
            next_x_t = results[0]
            new_states = results[1:]
            return next_x_t, new_states

        return _gen
Beispiel #8
0
    def __init__(self,
                 volume_manager,
                 input_size,
                 hidden_sizes,
                 output_size,
                 n_gaussians,
                 activation='tanh',
                 use_previous_direction=False,
                 use_layer_normalization=False,
                 drop_prob=0.,
                 use_zoneout=False,
                 use_skip_connections=False,
                 seed=1234,
                 **_):
        """
        Parameters
        ----------
        volume_manager : :class:`VolumeManger` object
            Use to evaluate the diffusion signal at specific coordinates.
        input_size : int
            Number of units each element Xi in the input sequence X has.
        hidden_sizes : int, list of int
            Number of hidden units each GRU should have.
        output_size : int
            Number of units the regression layer should have.
        n_gaussians : int
            Number of gaussians in the mixture
        activation : str
            Activation function to apply on the "cell candidate"
        use_previous_direction : bool
            Use the previous direction as an additional input
        use_layer_normalization : bool
            Use LayerNormalization to normalize preactivations and stabilize hidden layer evolution
        drop_prob : float
            Dropout/Zoneout probability for recurrent networks. See: https://arxiv.org/pdf/1512.05287.pdf & https://arxiv.org/pdf/1606.01305.pdf
        use_zoneout : bool
            Use zoneout implementation instead of dropout
        use_skip_connections : bool
            Use skip connections from the input to all hidden layers in the network, and from all hidden layers to the output layer
        seed : int
            Random seed used for dropout normalization
        """
        super(GRU_Regression,
              self).__init__(input_size,
                             hidden_sizes,
                             activation=activation,
                             use_layer_normalization=use_layer_normalization,
                             drop_prob=drop_prob,
                             use_zoneout=use_zoneout,
                             use_skip_connections=use_skip_connections,
                             seed=seed)
        self.volume_manager = volume_manager
        self.n_gaussians = n_gaussians

        assert output_size == 3  # Only 3-dimensional target is supported for now
        self.output_size = output_size

        self.use_previous_direction = use_previous_direction

        # GRU_Mixture does not predict a direction, so it cannot predict an offset
        self.predict_offset = False

        # Do not use dropout/zoneout in last hidden layer
        self.layer_regression_size = sum([
            n_gaussians,  # Mixture weights
            n_gaussians * output_size,  # Means
            n_gaussians * output_size
        ])  # Stds
        output_layer_input_size = sum(
            self.hidden_sizes
        ) if self.use_skip_connections else self.hidden_sizes[-1]
        self.layer_regression = LayerRegression(output_layer_input_size,
                                                self.layer_regression_size)
class GRU_RegressionAndBinaryClassification(GRU):
    """ A standard GRU model with both a regression output layer and
    binary classification output layer.

    The regression layer consists in fully connected layer (DenseLayer)
    whereas the binary classification layer consists in a fully connected
    layer with a sigmoid non-linearity to learn when to stop.
    """
    def __init__(self, input_size, hidden_sizes, output_size, **_):
        """
        Parameters
        ----------
        input_size : int
            Number of units each element Xi in the input sequence X has.
        hidden_sizes : int, list of int
            Number of hidden units each GRU should have.
        output_size : int
            Number of units the regression layer should have.
        """
        super().__init__(input_size, hidden_sizes)
        self.output_size = output_size
        self.layer_regression = LayerRegression(self.hidden_sizes[-1],
                                                self.output_size)
        self.stopping_layer = LayerDense(self.hidden_sizes[-1] + input_size,
                                         1,
                                         activation="sigmoid",
                                         name="stopping")

    def initialize(self, weights_initializer=initer.UniformInitializer(1234)):
        super().initialize(weights_initializer)
        self.layer_regression.initialize(weights_initializer)
        self.stopping_layer.initialize(weights_initializer)

    @property
    def hyperparameters(self):
        hyperparameters = super().hyperparameters
        hyperparameters['output_size'] = self.output_size
        return hyperparameters

    @property
    def parameters(self):
        return super(
        ).parameters + self.layer_regression.parameters + self.stopping_layer.parameters

    def _fprop(self, Xi, Xi_plus1, *args):
        outputs = super()._fprop(Xi, *args)
        last_layer_h = outputs[len(self.hidden_sizes) - 1]
        regression_out = self.layer_regression.fprop(last_layer_h)
        stopping = self.stopping_layer.fprop(
            T.concatenate([last_layer_h, Xi_plus1], axis=1))

        return outputs + (regression_out, stopping)

    def get_output(self, X):
        outputs_info_h = []
        for hidden_size in self.hidden_sizes:
            outputs_info_h.append(T.zeros((X.shape[0], hidden_size)))

        results, updates = theano.scan(
            fn=self._fprop,
            outputs_info=outputs_info_h + [None, None],
            sequences=[{
                "input": T.transpose(
                    X, axes=(1, 0, 2)
                ),  # We want to scan over sequence elements, not the examples.
                "taps": [0, 1]
            }],
            n_steps=X.shape[1] - 1)

        self.graph_updates = updates
        # Put back the examples so they are in the first dimension.
        self.regression_out = T.transpose(results[-2], axes=(1, 0, 2))
        self.stopping = T.transpose(results[-1], axes=(1, 0, 2))

        return self.regression_out, self.stopping

    def use(self, X):
        directions = self.get_output(X)
        return directions
Beispiel #10
0
class GRU_Multistep_Gaussian(GRU):
    """ A multistep GRU model used to predict multivariate gaussian parameters (means and standard deviations)

    For each target dimension, the model outputs (m) distribution parameters estimates for each prediction horizon up to (k)
    """
    def __init__(self,
                 volume_manager,
                 input_size,
                 hidden_sizes,
                 target_dims,
                 k,
                 m,
                 seed,
                 use_previous_direction=False,
                 use_layer_normalization=False,
                 drop_prob=0.,
                 use_zoneout=False,
                 **_):
        """
        Parameters
        ----------
        volume_manager : :class:`VolumeManger` object
            Used to evaluate the diffusion signal at specific coordinates using multiple subjects
        input_size : int
            Number of units each element Xi in the input sequence X has.
        hidden_sizes : int, list of int
            Number of hidden units each GRU should have.
        target_dims : int
            Number of dimensions of the multivariate gaussian to estimate; the model outputs two distribution parameters for each dimension
        k : int
            Number of steps ahead to predict (the model will predict all steps up to k)
        m : int
            Number of Monte-Carlo samples used to estimate the gaussian parameters
        seed : int
            Random seed to initialize the random noise used for sampling and dropout.
        use_previous_direction : bool
            Use the previous direction as an additional input
        use_layer_normalization : bool
            Use LayerNormalization to normalize preactivations and stabilize hidden layer evolution
        drop_prob : float
            Dropout/Zoneout probability for recurrent networks. See: https://arxiv.org/pdf/1512.05287.pdf & https://arxiv.org/pdf/1606.01305.pdf
        use_zoneout : bool
            Use zoneout implementation instead of dropout
        """
        super().__init__(input_size, hidden_sizes, use_layer_normalization,
                         drop_prob, use_zoneout, seed)
        self.target_dims = target_dims
        self.target_size = 2 * self.target_dims  # Output distribution parameters mu and sigma for each dimension

        self.volume_manager = volume_manager

        self.k = k
        self.m = m
        self.seed = seed

        self.use_previous_direction = use_previous_direction

        self.srng = MRG_RandomStreams(self.seed)

        # Do not use dropout/zoneout in last hidden layer
        self.layer_regression = LayerRegression(self.hidden_sizes[-1],
                                                self.target_size,
                                                normed=False)

    def initialize(self, weights_initializer=initer.UniformInitializer(1234)):
        super().initialize(weights_initializer)
        self.layer_regression.initialize(weights_initializer)

    @property
    def hyperparameters(self):
        hyperparameters = super().hyperparameters
        hyperparameters['target_dims'] = self.target_dims
        hyperparameters['target_size'] = self.target_size
        hyperparameters['k'] = self.k
        hyperparameters['m'] = self.m
        hyperparameters['seed'] = self.seed
        hyperparameters['use_previous_direction'] = self.use_previous_direction
        return hyperparameters

    @property
    def parameters(self):
        return super().parameters + self.layer_regression.parameters

    def _fprop_step(self, Xi, *args):
        # Xi.shape : (batch_size, 4)    *if self.use_previous_direction, Xi.shape : (batch_size,7)
        # coords + dwi ID (+ previous_direction)

        # coords : streamlines 3D coordinates.
        # coords.shape : (batch_size, 4) where the last column is a dwi ID.
        # args.shape : n_layers * (batch_size, layer_size)
        coords = Xi[:, :4]

        batch_size = Xi.shape[0]

        if self.k > 1:
            # Random noise used for sampling at each step (t+2)...(t+k)
            # epsilon.shape : (K-1, batch_size, target_dimensions)
            epsilon = self.srng.normal(
                (self.k - 1, batch_size, self.target_dims))

        # Get diffusion data.
        # data_at_coords.shape : (batch_size, input_size)
        data_at_coords = self.volume_manager.eval_at_coords(coords)

        if self.use_previous_direction:
            # previous_direction.shape : (batch_size, 3)
            previous_direction = Xi[:, 4:]
            fprop_input = T.concatenate([data_at_coords, previous_direction],
                                        axis=1)
        else:
            fprop_input = data_at_coords

        # Hidden state to be passed to the next GRU iteration (next _fprop call)
        # next_hidden_state.shape : n_layers * (batch_size, layer_size)
        next_hidden_state = super()._fprop(fprop_input, *args)

        # Compute the distribution parameters for step (t)
        # distribution_params.shape : (batch_size, target_size)
        distribution_params = self._predict_distribution_params(
            next_hidden_state[-1])
        # k_distribution_params = T.set_subtensor(k_distribution_params[:, 0, :, :], distribution_params)
        k_distribution_params = [distribution_params]

        sample_hidden_state = next_hidden_state

        for k in range(1, self.k):
            # Sample an input for the next step
            # sample_directions.shape : (batch_size, target_dimensions)
            sample_directions = self.get_stochastic_samples(
                distribution_params, epsilon[k - 1])

            # Follow *unnormalized* direction and get diffusion data at the new location.
            coords = T.concatenate(
                [coords[:, :3] + sample_directions, coords[:, 3:]], axis=1)

            data_at_coords = self.volume_manager.eval_at_coords(coords)

            if self.use_previous_direction:
                # previous_direction.shape : (batch_size, 3)
                previous_direction = sample_directions
                fprop_input = T.concatenate(
                    [data_at_coords, previous_direction], axis=1)
            else:
                fprop_input = data_at_coords

            # Compute the sample distribution parameters for step (t+k)
            sample_hidden_state = super()._fprop(fprop_input,
                                                 *sample_hidden_state)
            distribution_params = self._predict_distribution_params(
                sample_hidden_state[-1])
            k_distribution_params += [distribution_params]

        k_distribution_params = T.stack(k_distribution_params, axis=1)

        return next_hidden_state + (k_distribution_params, )

    @staticmethod
    def get_stochastic_samples(distribution_parameters, noise):
        # distribution_parameters.shape : (batch_size, [seq_len], target_size)
        # distribution_params[0] = [mu_x, mu_y, mu_z, std_x, std_y, std_z]

        # noise.shape : (batch_size, target_dims)

        mu = distribution_parameters[..., :3]
        sigma = distribution_parameters[..., 3:6]

        samples = mu + noise * sigma

        return samples

    @staticmethod
    def get_max_component_samples(distribution_parameters):
        # distribution_parameters.shape : (batch_size, [seq_len], target_size)
        # distribution_params[0] = [mu_x, mu_y, mu_z, std_x, std_y, std_z]
        mean = distribution_parameters[..., :3]
        return mean

    def _predict_distribution_params(self, hidden_state):
        # regression layer outputs an array [mean_x, mean_y, mean_z, log(std_x), log(std_y), log(std_z)]
        # regression_output.shape : (batch_size, target_size)
        regression_output = self.layer_regression.fprop(hidden_state)

        # Use T.exp to retrieve a positive sigma
        distribution_params = T.set_subtensor(
            regression_output[..., 3:6], T.exp(regression_output[..., 3:6]))

        # distribution_params.shape : (batch_size, target_size)
        return distribution_params

    def get_output(self, X):
        # X.shape : (batch_size, seq_len, n_features=4)
        # For tractography n_features is (x,y,z) + (dwi_id,)

        # Repeat Xs to compute M sample sequences for each input
        # inputs.shape : (batch_size*M, seq_len, n_features)
        inputs = T.repeat(X, self.m, axis=0)

        # outputs_info_h.shape : n_layers * (batch_size*M, layer_size)
        outputs_info_h = []
        for hidden_size in self.hidden_sizes:
            outputs_info_h.append(T.zeros((inputs.shape[0], hidden_size)))

        # results.shape : n_layers * (seq_len, batch_size*M, layer_size), (seq_len, batch_size*M, K, target_size)
        results, updates = theano.scan(
            fn=self.
            _fprop_step,  # We want to scan over sequence elements, not the examples.
            sequences=[T.transpose(inputs, axes=(1, 0, 2))],
            outputs_info=outputs_info_h + [None],
            non_sequences=self.parameters + self.volume_manager.volumes,
            strict=True)

        self.graph_updates = updates

        # Put back the examples so they are in the first dimension
        # transposed.shape : (batch_size*M, seq_len, K, target_size)
        transposed = T.transpose(results[-1], axes=(1, 0, 2, 3))

        # Split the M sample sequences into a new dimension
        # reshaped.shape : (batch_size, M, seq_len, K, target_size)
        reshaped = T.reshape(
            transposed,
            (X.shape[0], self.m, X.shape[1], self.k, self.target_size))

        # Transpose the output to get the M sequences dimension in the right place
        # regression_out.shape : (batch_size, seq_len, K, M, target_size)
        regression_out = T.transpose(reshaped, (0, 2, 3, 1, 4))

        return regression_out

    def make_sequence_generator(self, subject_id=0, use_max_component=False):
        """ Makes functions that return the prediction for x_{t+1} for every
        sequence in the batch given x_{t} and the current state of the model h^{l}_{t}.

        Parameters
        ----------
        subject_id : int, optional
            ID of the subject from which its diffusion data will be used. Default: 0.
        """

        # Build the sequence generator as a theano function.
        states_h = []
        for i in range(len(self.hidden_sizes)):
            state_h = T.matrix(name="layer{}_state_h".format(i))
            states_h.append(state_h)

        symb_x_t = T.matrix(name="x_t")

        # Temporarily set $k$ to one.
        k_bak = self.k
        self.k = 1

        new_states = self._fprop_step(symb_x_t, *states_h)
        new_states_h = new_states[:len(self.hidden_sizes)]

        # model_output.shape : (batch_size, K=1, target_size)
        model_output = new_states[-1]

        distribution_params = model_output[:, 0, :]

        if use_max_component:
            predictions = self.get_max_component_samples(distribution_params)
        else:
            # Sample value from distribution
            srng = MRG_RandomStreams(seed=1234)

            batch_size = symb_x_t.shape[0]
            noise = srng.normal((batch_size, self.target_dims))

            # predictions.shape : (batch_size, target_dims)
            predictions = self.get_stochastic_samples(distribution_params,
                                                      noise)

        f = theano.function(inputs=[symb_x_t] + states_h,
                            outputs=[predictions] + list(new_states_h))

        self.k = k_bak  # Restore original $k$.

        def _gen(x_t, states, previous_direction=None):
            """ Returns the prediction for x_{t+1} for every
                sequence in the batch given x_{t} and the current states
                of the model h^{l}_{t}.

            Parameters
            ----------
            x_t : ndarray with shape (batch_size, 3)
                Streamline coordinate (x, y, z).
            states : list of 2D array of shape (batch_size, hidden_size)
                Currrent states of the network.
            previous_direction : ndarray with shape (batch_size, 3)
                If using previous direction, these should be added to the input

            Returns
            -------
            next_x_t : ndarray with shape (batch_size, 3)
                Directions to follow.
            new_states : list of 2D array of shape (batch_size, hidden_size)
                Updated states of the network after seeing x_t.
            """
            # Append the DWI ID of each sequence after the 3D coordinates.
            subject_ids = np.array([subject_id] * len(x_t), dtype=floatX)[:,
                                                                          None]

            if not self.use_previous_direction:
                x_t = np.c_[x_t, subject_ids]
            else:
                x_t = np.c_[x_t, subject_ids, previous_direction]

            results = f(x_t, *states)
            next_x_t = results[0]
            new_states = results[1:]
            return next_x_t, new_states

        return _gen

    def save(self, path):
        super().save(path)

        savedir = smartutils.create_folder(pjoin(path, type(self).__name__))
        state = {
            "version":
            1,
            "_srng_rstate":
            self.srng.rstate,
            "_srng_state_updates": [
                state_update[0].get_value()
                for state_update in self.srng.state_updates
            ]
        }

        np.savez(pjoin(savedir, "state.npz"), **state)

    def load(self, path):
        super().load(path)

        loaddir = pjoin(path, type(self).__name__)
        state = np.load(pjoin(loaddir, 'state.npz'))

        self.srng.rstate[:] = state['_srng_rstate']

        for state_update, saved_state in zip(self.srng.state_updates,
                                             state["_srng_state_updates"]):
            state_update[0].set_value(saved_state)
Beispiel #11
0
class GRU_Multistep_Gaussian(GRU):
    """ A multistep GRU model used to predict multivariate gaussian parameters (means and standard deviations)

    For each target dimension, the model outputs (m) distribution parameters estimates for each prediction horizon up to (k)
    """

    def __init__(
        self, volume_manager, input_size, hidden_sizes, target_dims, k, m, seed, use_previous_direction=False, **_
    ):
        """
        Parameters
        ----------
        volume_manager : :class:`VolumeManger` object
            Used to evaluate the diffusion signal at specific coordinates using multiple subjects
        input_size : int
            Number of units each element Xi in the input sequence X has.
        hidden_sizes : int, list of int
            Number of hidden units each GRU should have.
        target_dims : int
            Number of dimensions of the multivariate gaussian to estimate; the model outputs two distribution parameters for each dimension
        k : int
            Number of steps ahead to predict (the model will predict all steps up to k)
        m : int
            Number of Monte-Carlo samples used to estimate the gaussian parameters
        seed : int
            Random seed to initialize the random noise used for sampling
        use_previous_direction : bool
            Use the previous direction as an additional input
        """
        super().__init__(input_size, hidden_sizes)
        self.target_dims = target_dims
        self.target_size = 2 * self.target_dims  # Output distribution parameters mu and sigma for each dimension
        self.layer_regression = LayerRegression(self.hidden_sizes[-1], self.target_size, normed=False)

        self.volume_manager = volume_manager

        self.k = k
        self.m = m
        self.seed = seed

        self.use_previous_direction = use_previous_direction

        self.srng = MRG_RandomStreams(self.seed)

    def initialize(self, weights_initializer=initer.UniformInitializer(1234)):
        super().initialize(weights_initializer)
        self.layer_regression.initialize(weights_initializer)

    @property
    def hyperparameters(self):
        hyperparameters = super().hyperparameters
        hyperparameters["target_dims"] = self.target_dims
        hyperparameters["target_size"] = self.target_size
        hyperparameters["k"] = self.k
        hyperparameters["m"] = self.m
        hyperparameters["seed"] = self.seed
        hyperparameters["use_previous_direction"] = self.use_previous_direction
        return hyperparameters

    @property
    def parameters(self):
        return super().parameters + self.layer_regression.parameters

    def _fprop_step(self, Xi, *args):
        # Xi.shape : (batch_size, 4)    *if self.use_previous_direction, Xi.shape : (batch_size,7)
        # coords + dwi ID (+ previous_direction)

        # coords : streamlines 3D coordinates.
        # coords.shape : (batch_size, 4) where the last column is a dwi ID.
        # args.shape : n_layers * (batch_size, layer_size)
        coords = Xi[:, :4]

        batch_size = Xi.shape[0]

        if self.k > 1:
            # Random noise used for sampling at each step (t+2)...(t+k)
            # epsilon.shape : (K-1, batch_size, target_dimensions)
            epsilon = self.srng.normal((self.k - 1, batch_size, self.target_dims))

        # Get diffusion data.
        # data_at_coords.shape : (batch_size, input_size)
        data_at_coords = self.volume_manager.eval_at_coords(coords)

        if self.use_previous_direction:
            # previous_direction.shape : (batch_size, 3)
            previous_direction = Xi[:, 4:]
            fprop_input = T.concatenate([data_at_coords, previous_direction], axis=1)
        else:
            fprop_input = data_at_coords

        # Hidden state to be passed to the next GRU iteration (next _fprop call)
        # next_hidden_state.shape : n_layers * (batch_size, layer_size)
        next_hidden_state = super()._fprop(fprop_input, *args)

        # Compute the distribution parameters for step (t)
        # distribution_params.shape : (batch_size, target_size)
        distribution_params = self._predict_distribution_params(next_hidden_state[-1])
        # k_distribution_params = T.set_subtensor(k_distribution_params[:, 0, :, :], distribution_params)
        k_distribution_params = [distribution_params]

        sample_hidden_state = next_hidden_state

        for k in range(1, self.k):
            # Sample an input for the next step
            # sample_directions.shape : (batch_size, target_dimensions)
            sample_directions = self.get_stochastic_samples(distribution_params, epsilon[k - 1])

            # Follow *unnormalized* direction and get diffusion data at the new location.
            coords = T.concatenate([coords[:, :3] + sample_directions, coords[:, 3:]], axis=1)

            data_at_coords = self.volume_manager.eval_at_coords(coords)

            if self.use_previous_direction:
                # previous_direction.shape : (batch_size, 3)
                previous_direction = sample_directions
                fprop_input = T.concatenate([data_at_coords, previous_direction], axis=1)
            else:
                fprop_input = data_at_coords

            # Compute the sample distribution parameters for step (t+k)
            sample_hidden_state = super()._fprop(fprop_input, *sample_hidden_state)
            distribution_params = self._predict_distribution_params(sample_hidden_state[-1])
            k_distribution_params += [distribution_params]

        k_distribution_params = T.stack(k_distribution_params, axis=1)

        return next_hidden_state + (k_distribution_params,)

    @staticmethod
    def get_stochastic_samples(distribution_parameters, noise):
        # distribution_parameters.shape : (batch_size, [seq_len], target_size)
        # distribution_params[0] = [mu_x, mu_y, mu_z, std_x, std_y, std_z]

        # noise.shape : (batch_size, target_dims)

        mu = distribution_parameters[..., :3]
        sigma = distribution_parameters[..., 3:6]

        samples = mu + noise * sigma

        return samples

    @staticmethod
    def get_max_component_samples(distribution_parameters):
        # distribution_parameters.shape : (batch_size, [seq_len], target_size)
        # distribution_params[0] = [mu_x, mu_y, mu_z, std_x, std_y, std_z]
        mean = distribution_parameters[..., :3]
        return mean

    def _predict_distribution_params(self, hidden_state):
        # regression layer outputs an array [mean_x, mean_y, mean_z, log(std_x), log(std_y), log(std_z)]
        # regression_output.shape : (batch_size, target_size)
        regression_output = self.layer_regression.fprop(hidden_state)

        # Use T.exp to retrieve a positive sigma
        distribution_params = T.set_subtensor(regression_output[..., 3:6], T.exp(regression_output[..., 3:6]))

        # distribution_params.shape : (batch_size, target_size)
        return distribution_params

    def get_output(self, X):
        # X.shape : (batch_size, seq_len, n_features=4)
        # For tractography n_features is (x,y,z) + (dwi_id,)

        # Repeat Xs to compute M sample sequences for each input
        # inputs.shape : (batch_size*M, seq_len, n_features)
        inputs = T.repeat(X, self.m, axis=0)

        # outputs_info_h.shape : n_layers * (batch_size*M, layer_size)
        outputs_info_h = []
        for hidden_size in self.hidden_sizes:
            outputs_info_h.append(T.zeros((inputs.shape[0], hidden_size)))

        # results.shape : n_layers * (seq_len, batch_size*M, layer_size), (seq_len, batch_size*M, K, target_size)
        results, updates = theano.scan(
            fn=self._fprop_step,  # We want to scan over sequence elements, not the examples.
            sequences=[T.transpose(inputs, axes=(1, 0, 2))],
            outputs_info=outputs_info_h + [None],
            non_sequences=self.parameters + self.volume_manager.volumes,
            strict=True,
        )

        self.graph_updates = updates

        # Put back the examples so they are in the first dimension
        # transposed.shape : (batch_size*M, seq_len, K, target_size)
        transposed = T.transpose(results[-1], axes=(1, 0, 2, 3))

        # Split the M sample sequences into a new dimension
        # reshaped.shape : (batch_size, M, seq_len, K, target_size)
        reshaped = T.reshape(transposed, (X.shape[0], self.m, X.shape[1], self.k, self.target_size))

        # Transpose the output to get the M sequences dimension in the right place
        # regression_out.shape : (batch_size, seq_len, K, M, target_size)
        regression_out = T.transpose(reshaped, (0, 2, 3, 1, 4))

        return regression_out

    def make_sequence_generator(self, subject_id=0, use_max_component=False):
        """ Makes functions that return the prediction for x_{t+1} for every
        sequence in the batch given x_{t} and the current state of the model h^{l}_{t}.

        Parameters
        ----------
        subject_id : int, optional
            ID of the subject from which its diffusion data will be used. Default: 0.
        """

        # Build the sequence generator as a theano function.
        states_h = []
        for i in range(len(self.hidden_sizes)):
            state_h = T.matrix(name="layer{}_state_h".format(i))
            states_h.append(state_h)

        symb_x_t = T.matrix(name="x_t")

        # Temporarily set $k$ to one.
        k_bak = self.k
        self.k = 1

        new_states = self._fprop_step(symb_x_t, *states_h)
        new_states_h = new_states[: len(self.hidden_sizes)]

        # model_output.shape : (batch_size, K=1, target_size)
        model_output = new_states[-1]

        distribution_params = model_output[:, 0, :]

        if use_max_component:
            predictions = self.get_max_component_samples(distribution_params)
        else:
            # Sample value from distribution
            srng = MRG_RandomStreams(seed=1234)

            batch_size = symb_x_t.shape[0]
            noise = srng.normal((batch_size, self.target_dims))

            # predictions.shape : (batch_size, target_dims)
            predictions = self.get_stochastic_samples(distribution_params, noise)

        f = theano.function(inputs=[symb_x_t] + states_h, outputs=[predictions] + list(new_states_h))

        self.k = k_bak  # Restore original $k$.

        def _gen(x_t, states, previous_direction=None):
            """ Returns the prediction for x_{t+1} for every
                sequence in the batch given x_{t} and the current states
                of the model h^{l}_{t}.

            Parameters
            ----------
            x_t : ndarray with shape (batch_size, 3)
                Streamline coordinate (x, y, z).
            states : list of 2D array of shape (batch_size, hidden_size)
                Currrent states of the network.
            previous_direction : ndarray with shape (batch_size, 3)
                If using previous direction, these should be added to the input

            Returns
            -------
            next_x_t : ndarray with shape (batch_size, 3)
                Directions to follow.
            new_states : list of 2D array of shape (batch_size, hidden_size)
                Updated states of the network after seeing x_t.
            """
            # Append the DWI ID of each sequence after the 3D coordinates.
            subject_ids = np.array([subject_id] * len(x_t), dtype=floatX)[:, None]

            if not self.use_previous_direction:
                x_t = np.c_[x_t, subject_ids]
            else:
                x_t = np.c_[x_t, subject_ids, previous_direction]

            results = f(x_t, *states)
            next_x_t = results[0]
            new_states = results[1:]
            return next_x_t, new_states

        return _gen

    def save(self, path):
        super().save(path)

        savedir = smartutils.create_folder(pjoin(path, type(self).__name__))
        state = {
            "version": 1,
            "_srng_rstate": self.srng.rstate,
            "_srng_state_updates": [state_update[0].get_value() for state_update in self.srng.state_updates],
        }

        np.savez(pjoin(savedir, "state.npz"), **state)

    def load(self, path):
        super().load(path)

        loaddir = pjoin(path, type(self).__name__)
        state = np.load(pjoin(loaddir, "state.npz"))

        self.srng.rstate[:] = state["_srng_rstate"]

        for state_update, saved_state in zip(self.srng.state_updates, state["_srng_state_updates"]):
            state_update[0].set_value(saved_state)
class FFNN_Regression(FFNN):
    """ A standard FFNN model with a regression layer stacked on top of it.
    """

    def __init__(self, volume_manager, input_size, hidden_sizes, output_size, activation, use_previous_direction=False, **_):
        """
        Parameters
        ----------
        volume_manager : :class:`VolumeManger` object
            Use to evaluate the diffusion signal at specific coordinates.
        input_size : int
            Number of units each element X has.
        hidden_sizes : int, list of int
            Number of hidden units each FFNN layer should have.
        output_size : int
            Number of units the regression layer should have.
        activation : str
            Name of the activation function to use in the hidden layers
        use_previous_direction : bool
            Use the previous direction as an additional input
        """
        super().__init__(input_size, hidden_sizes, activation)
        self.volume_manager = volume_manager
        self.output_size = output_size
        self.use_previous_direction = use_previous_direction
        self.layer_regression = LayerRegression(self.hidden_sizes[-1], self.output_size)

    def initialize(self, weights_initializer=initer.UniformInitializer(1234)):
        super().initialize(weights_initializer)
        self.layer_regression.initialize(weights_initializer)

    @property
    def hyperparameters(self):
        hyperparameters = super().hyperparameters
        hyperparameters['output_size'] = self.output_size
        hyperparameters['use_previous_direction'] = self.use_previous_direction
        return hyperparameters

    @property
    def parameters(self):
        return super().parameters + self.layer_regression.parameters

    def _fprop(self, Xi, *args):
        # Xi.shape : (batch_size, 4)    *if self.use_previous_direction, Xi.shape : (batch_size,7)
        # coords + dwi ID (+ previous_direction)

        # coords : streamlines 3D coordinates.
        # coords.shape : (batch_size, 4) where the last column is a dwi ID.
        # args.shape : n_layers * (batch_size, layer_size)
        coords = Xi[:, :4]

        # Get diffusion data.
        # data_at_coords.shape : (batch_size, input_size)
        data_at_coords = self.volume_manager.eval_at_coords(coords)

        if self.use_previous_direction:
            # previous_direction.shape : (batch_size, 3)
            previous_direction = Xi[:, 4:]
            fprop_input = T.concatenate([data_at_coords, previous_direction], axis=1)
        else:
            fprop_input = data_at_coords

        # Hidden state to be passed to the next GRU iteration (next _fprop call)
        # next_hidden_state.shape : n_layers * (batch_size, layer_size)
        layer_outputs = super()._fprop(fprop_input)

        # Compute the direction to follow for step (t)
        regression_out = self.layer_regression.fprop(layer_outputs[-1])

        return layer_outputs + (regression_out,)

    def make_sequence_generator(self, subject_id=0, **_):
        """ Makes functions that return the prediction for x_{t+1} for every
        sequence in the batch given x_{t}.

        Parameters
        ----------
        subject_id : int, optional
            ID of the subject from which its diffusion data will be used. Default: 0.
        """

        # Build the sequence generator as a theano function.
        symb_x_t = T.matrix(name="x_t")

        layer_outputs = self._fprop(symb_x_t)

        # predictions.shape : (batch_size, target_size)
        predictions = layer_outputs[-1]

        f = theano.function(inputs=[symb_x_t], outputs=[predictions])

        def _gen(x_t, states, previous_direction=None):
            """ Returns the prediction for x_{t+1} for every
                sequence in the batch given x_{t}.

            Parameters
            ----------
            x_t : ndarray with shape (batch_size, 3)
                Streamline coordinate (x, y, z).
            states : list of 2D array of shape (batch_size, hidden_size)
                Currrent states of the network.
            previous_direction : ndarray with shape (batch_size, 3)
                If using previous direction, these should be added to the input

            Returns
            -------
            next_x_t : ndarray with shape (batch_size, 3)
                Directions to follow.
            new_states : list of 2D array of shape (batch_size, hidden_size)
                Updated states of the network after seeing x_t.
            """
            # Append the DWI ID of each sequence after the 3D coordinates.
            subject_ids = np.array([subject_id] * len(x_t), dtype=floatX)[:, None]

            if not self.use_previous_direction:
                x_t = np.c_[x_t, subject_ids]
            else:
                x_t = np.c_[x_t, subject_ids, previous_direction]

            results = f(x_t)
            next_x_t = results[-1]

            next_x_t_both_directions = np.stack([next_x_t, -next_x_t], axis=-1)

            next_x_t = next_x_t_both_directions[
                (np.arange(next_x_t_both_directions.shape[0])[:, None]),
                (np.arange(next_x_t_both_directions.shape[1])[None, :]),
                np.argmin(np.linalg.norm(next_x_t_both_directions - previous_direction[:, :, None], axis=1), axis=1)[:, None]]

            # FFNN_Regression is not a recurrent network, return original states
            new_states = states

            return next_x_t, new_states

        return _gen
Beispiel #13
0
    def __init__(self,
                 volume_manager,
                 input_size,
                 hidden_sizes,
                 output_size,
                 use_previous_direction=False,
                 use_layer_normalization=False,
                 drop_prob=0.,
                 use_zoneout=False,
                 use_skip_connections=False,
                 neighborhood_radius=False,
                 learn_to_stop=False,
                 seed=1234,
                 **_):
        """
        Parameters
        ----------
        volume_manager : :class:`VolumeManger` object
            Use to evaluate the diffusion signal at specific coordinates.
        input_size : int
            Number of units each element Xi in the input sequence X has.
        hidden_sizes : int, list of int
            Number of hidden units each GRU should have.
        output_size : int
            Number of units the regression layer should have.
        use_previous_direction : bool
            Use the previous direction as an additional input
        use_layer_normalization : bool
            Use LayerNormalization to normalize preactivations and stabilize hidden layer evolution
        drop_prob : float
            Dropout/Zoneout probability for recurrent networks. See: https://arxiv.org/pdf/1512.05287.pdf & https://arxiv.org/pdf/1606.01305.pdf
        use_zoneout : bool
            Use zoneout implementation instead of dropout
        use_skip_connections : bool
            Use skip connections from the input to all hidden layers in the network, and from all hidden layers to the output layer
        neighborhood_radius : float
            Add signal in positions around the current streamline coordinate to the input (with given length in voxel space); None = no neighborhood
        learn_to_stop : bool
            Predict whether the streamline being generated should stop or not
        seed : int
            Random seed used for dropout normalization
        """
        self.neighborhood_radius = neighborhood_radius
        self.model_input_size = input_size
        if self.neighborhood_radius:
            self.neighborhood_directions = get_neighborhood_directions(
                self.neighborhood_radius)
            # Model input size is increased when using neighborhood
            self.model_input_size = input_size * self.neighborhood_directions.shape[
                0]

        super(GRU_Regression,
              self).__init__(self.model_input_size,
                             hidden_sizes,
                             use_layer_normalization=use_layer_normalization,
                             drop_prob=drop_prob,
                             use_zoneout=use_zoneout,
                             use_skip_connections=use_skip_connections,
                             seed=seed)
        # Restore input size
        self.input_size = input_size

        self.volume_manager = volume_manager

        assert output_size == 3  # Only 3-dimensional target is supported for now
        self.output_size = output_size

        self.use_previous_direction = use_previous_direction

        # GRU_Gaussian does not predict a direction, so it cannot predict an offset
        self.predict_offset = False
        self.learn_to_stop = learn_to_stop

        # Do not use dropout/zoneout in last hidden layer
        self.layer_regression_size = sum([
            output_size,  # Means
            output_size
        ])  # Stds
        output_layer_input_size = sum(
            self.hidden_sizes
        ) if self.use_skip_connections else self.hidden_sizes[-1]
        self.layer_regression = LayerRegression(output_layer_input_size,
                                                self.layer_regression_size)
        if self.learn_to_stop:
            # Predict whether a streamline should stop or keep growing
            self.layer_stopping = LayerDense(output_layer_input_size,
                                             1,
                                             activation='sigmoid',
                                             name="GRU_Gaussian_stopping")