def __init__(self, volume_manager, input_size, hidden_sizes, target_dims, k, m, seed, use_previous_direction=False, use_layer_normalization=False, drop_prob=0., use_zoneout=False, **_): """ Parameters ---------- volume_manager : :class:`VolumeManger` object Used to evaluate the diffusion signal at specific coordinates using multiple subjects input_size : int Number of units each element Xi in the input sequence X has. hidden_sizes : int, list of int Number of hidden units each GRU should have. target_dims : int Number of dimensions of the multivariate gaussian to estimate; the model outputs two distribution parameters for each dimension k : int Number of steps ahead to predict (the model will predict all steps up to k) m : int Number of Monte-Carlo samples used to estimate the gaussian parameters seed : int Random seed to initialize the random noise used for sampling and dropout. use_previous_direction : bool Use the previous direction as an additional input use_layer_normalization : bool Use LayerNormalization to normalize preactivations and stabilize hidden layer evolution drop_prob : float Dropout/Zoneout probability for recurrent networks. See: https://arxiv.org/pdf/1512.05287.pdf & https://arxiv.org/pdf/1606.01305.pdf use_zoneout : bool Use zoneout implementation instead of dropout """ super().__init__(input_size, hidden_sizes, use_layer_normalization, drop_prob, use_zoneout, seed) self.target_dims = target_dims self.target_size = 2 * self.target_dims # Output distribution parameters mu and sigma for each dimension self.volume_manager = volume_manager self.k = k self.m = m self.seed = seed self.use_previous_direction = use_previous_direction self.srng = MRG_RandomStreams(self.seed) # Do not use dropout/zoneout in last hidden layer self.layer_regression = LayerRegression(self.hidden_sizes[-1], self.target_size, normed=False)
def __init__(self, input_size, hidden_sizes, output_size, **_): """ Parameters ---------- input_size : int Number of units each element Xi in the input sequence X has. hidden_sizes : int, list of int Number of hidden units each GRU should have. output_size : int Number of units the regression layer should have. """ super().__init__(input_size, hidden_sizes) self.output_size = output_size self.layer_regression = LayerRegression(self.hidden_sizes[-1], self.output_size) self.stopping_layer = LayerDense(self.hidden_sizes[-1] + input_size, 1, activation="sigmoid", name="stopping")
def __init__(self, volume_manager, input_size, hidden_sizes, output_size, use_previous_direction=False, **_): """ Parameters ---------- volume_manager : :class:`VolumeManger` object Use to evaluate the diffusion signal at specific coordinates. input_size : int Number of units each element Xi in the input sequence X has. hidden_sizes : int, list of int Number of hidden units each GRU should have. output_size : int Number of units the regression layer should have. use_previous_direction : bool Use the previous direction as an additional input """ super().__init__(input_size, hidden_sizes) self.volume_manager = volume_manager self.output_size = output_size self.use_previous_direction = use_previous_direction self.layer_regression = LayerRegression(self.hidden_sizes[-1], self.output_size)
def __init__( self, volume_manager, input_size, hidden_sizes, target_dims, k, m, seed, use_previous_direction=False, **_ ): """ Parameters ---------- volume_manager : :class:`VolumeManger` object Used to evaluate the diffusion signal at specific coordinates using multiple subjects input_size : int Number of units each element Xi in the input sequence X has. hidden_sizes : int, list of int Number of hidden units each GRU should have. target_dims : int Number of dimensions of the multivariate gaussian to estimate; the model outputs two distribution parameters for each dimension k : int Number of steps ahead to predict (the model will predict all steps up to k) m : int Number of Monte-Carlo samples used to estimate the gaussian parameters seed : int Random seed to initialize the random noise used for sampling use_previous_direction : bool Use the previous direction as an additional input """ super().__init__(input_size, hidden_sizes) self.target_dims = target_dims self.target_size = 2 * self.target_dims # Output distribution parameters mu and sigma for each dimension self.layer_regression = LayerRegression(self.hidden_sizes[-1], self.target_size, normed=False) self.volume_manager = volume_manager self.k = k self.m = m self.seed = seed self.use_previous_direction = use_previous_direction self.srng = MRG_RandomStreams(self.seed)
class GRU_RegressionAndBinaryClassification(GRU): """ A standard GRU model with both a regression output layer and binary classification output layer. The regression layer consists in fully connected layer (DenseLayer) whereas the binary classification layer consists in a fully connected layer with a sigmoid non-linearity to learn when to stop. """ def __init__(self, input_size, hidden_sizes, output_size, **_): """ Parameters ---------- input_size : int Number of units each element Xi in the input sequence X has. hidden_sizes : int, list of int Number of hidden units each GRU should have. output_size : int Number of units the regression layer should have. """ super().__init__(input_size, hidden_sizes) self.output_size = output_size self.layer_regression = LayerRegression(self.hidden_sizes[-1], self.output_size) self.stopping_layer = LayerDense(self.hidden_sizes[-1] + input_size, 1, activation="sigmoid", name="stopping") def initialize(self, weights_initializer=initer.UniformInitializer(1234)): super().initialize(weights_initializer) self.layer_regression.initialize(weights_initializer) self.stopping_layer.initialize(weights_initializer) @property def hyperparameters(self): hyperparameters = super().hyperparameters hyperparameters['output_size'] = self.output_size return hyperparameters @property def parameters(self): return super().parameters + self.layer_regression.parameters + self.stopping_layer.parameters def _fprop(self, Xi, Xi_plus1, *args): outputs = super()._fprop(Xi, *args) last_layer_h = outputs[len(self.hidden_sizes)-1] regression_out = self.layer_regression.fprop(last_layer_h) stopping = self.stopping_layer.fprop(T.concatenate([last_layer_h, Xi_plus1], axis=1)) return outputs + (regression_out, stopping) def get_output(self, X): outputs_info_h = [] for hidden_size in self.hidden_sizes: outputs_info_h.append(T.zeros((X.shape[0], hidden_size))) results, updates = theano.scan(fn=self._fprop, outputs_info=outputs_info_h + [None, None], sequences=[{"input": T.transpose(X, axes=(1, 0, 2)), # We want to scan over sequence elements, not the examples. "taps": [0, 1]}], n_steps=X.shape[1]-1) self.graph_updates = updates # Put back the examples so they are in the first dimension. self.regression_out = T.transpose(results[-2], axes=(1, 0, 2)) self.stopping = T.transpose(results[-1], axes=(1, 0, 2)) return self.regression_out, self.stopping def use(self, X): directions = self.get_output(X) return directions
class GRU_Regression(GRU): """ A standard GRU model with a regression layer stacked on top of it. """ def __init__(self, volume_manager, input_size, hidden_sizes, output_size, use_previous_direction=False, **_): """ Parameters ---------- volume_manager : :class:`VolumeManger` object Use to evaluate the diffusion signal at specific coordinates. input_size : int Number of units each element Xi in the input sequence X has. hidden_sizes : int, list of int Number of hidden units each GRU should have. output_size : int Number of units the regression layer should have. use_previous_direction : bool Use the previous direction as an additional input """ super().__init__(input_size, hidden_sizes) self.volume_manager = volume_manager self.output_size = output_size self.use_previous_direction = use_previous_direction self.layer_regression = LayerRegression(self.hidden_sizes[-1], self.output_size) def initialize(self, weights_initializer=initer.UniformInitializer(1234)): super().initialize(weights_initializer) self.layer_regression.initialize(weights_initializer) @property def hyperparameters(self): hyperparameters = super().hyperparameters hyperparameters['output_size'] = self.output_size hyperparameters['use_previous_direction'] = self.use_previous_direction return hyperparameters @property def parameters(self): return super().parameters + self.layer_regression.parameters def _fprop_step(self, Xi, *args): # Xi.shape : (batch_size, 4) *if self.use_previous_direction, Xi.shape : (batch_size,7) # coords + dwi ID (+ previous_direction) # coords : streamlines 3D coordinates. # coords.shape : (batch_size, 4) where the last column is a dwi ID. # args.shape : n_layers * (batch_size, layer_size) coords = Xi[:, :4] # Get diffusion data. # data_at_coords.shape : (batch_size, input_size) data_at_coords = self.volume_manager.eval_at_coords(coords) if self.use_previous_direction: # previous_direction.shape : (batch_size, 3) previous_direction = Xi[:, 4:] fprop_input = T.concatenate([data_at_coords, previous_direction], axis=1) else: fprop_input = data_at_coords # Hidden state to be passed to the next GRU iteration (next _fprop call) # next_hidden_state.shape : n_layers * (batch_size, layer_size) next_hidden_state = super()._fprop(fprop_input, *args) # Compute the direction to follow for step (t) regression_out = self.layer_regression.fprop(next_hidden_state[-1]) return next_hidden_state + (regression_out,) def get_output(self, X): # X.shape : (batch_size, seq_len, n_features=[4|7]) # For tractography n_features is (x,y,z) + (dwi_id,) + [previous_direction] outputs_info_h = [] for hidden_size in self.hidden_sizes: outputs_info_h.append(T.zeros((X.shape[0], hidden_size))) results, updates = theano.scan(fn=self._fprop_step, # We want to scan over sequence elements, not the examples. sequences=[T.transpose(X, axes=(1, 0, 2))], outputs_info=outputs_info_h + [None], non_sequences=self.parameters + self.volume_manager.volumes, strict=True) self.graph_updates = updates # Put back the examples so they are in the first dimension. # regression_out.shape : (batch_size, seq_len, target_size=3) self.regression_out = T.transpose(results[-1], axes=(1, 0, 2)) return self.regression_out def make_sequence_generator(self, subject_id=0, **_): """ Makes functions that return the prediction for x_{t+1} for every sequence in the batch given x_{t} and the current state of the model h^{l}_{t}. Parameters ---------- subject_id : int, optional ID of the subject from which its diffusion data will be used. Default: 0. """ # Build the sequence generator as a theano function. states_h = [] for i in range(len(self.hidden_sizes)): state_h = T.matrix(name="layer{}_state_h".format(i)) states_h.append(state_h) symb_x_t = T.matrix(name="x_t") new_states = self._fprop_step(symb_x_t, *states_h) new_states_h = new_states[:len(self.hidden_sizes)] # predictions.shape : (batch_size, target_size) predictions = new_states[-1] f = theano.function(inputs=[symb_x_t] + states_h, outputs=[predictions] + list(new_states_h)) def _gen(x_t, states, previous_direction=None): """ Returns the prediction for x_{t+1} for every sequence in the batch given x_{t} and the current states of the model h^{l}_{t}. Parameters ---------- x_t : ndarray with shape (batch_size, 3) Streamline coordinate (x, y, z). states : list of 2D array of shape (batch_size, hidden_size) Currrent states of the network. previous_direction : ndarray with shape (batch_size, 3) If using previous direction, these should be added to the input Returns ------- next_x_t : ndarray with shape (batch_size, 3) Directions to follow. new_states : list of 2D array of shape (batch_size, hidden_size) Updated states of the network after seeing x_t. """ # Append the DWI ID of each sequence after the 3D coordinates. subject_ids = np.array([subject_id] * len(x_t), dtype=floatX)[:, None] if not self.use_previous_direction: x_t = np.c_[x_t, subject_ids] else: x_t = np.c_[x_t, subject_ids, previous_direction] results = f(x_t, *states) next_x_t = results[0] new_states = results[1:] return next_x_t, new_states return _gen
def __init__(self, volume_manager, input_size, hidden_sizes, output_size, n_gaussians, activation='tanh', use_previous_direction=False, use_layer_normalization=False, drop_prob=0., use_zoneout=False, use_skip_connections=False, seed=1234, **_): """ Parameters ---------- volume_manager : :class:`VolumeManger` object Use to evaluate the diffusion signal at specific coordinates. input_size : int Number of units each element Xi in the input sequence X has. hidden_sizes : int, list of int Number of hidden units each GRU should have. output_size : int Number of units the regression layer should have. n_gaussians : int Number of gaussians in the mixture activation : str Activation function to apply on the "cell candidate" use_previous_direction : bool Use the previous direction as an additional input use_layer_normalization : bool Use LayerNormalization to normalize preactivations and stabilize hidden layer evolution drop_prob : float Dropout/Zoneout probability for recurrent networks. See: https://arxiv.org/pdf/1512.05287.pdf & https://arxiv.org/pdf/1606.01305.pdf use_zoneout : bool Use zoneout implementation instead of dropout use_skip_connections : bool Use skip connections from the input to all hidden layers in the network, and from all hidden layers to the output layer seed : int Random seed used for dropout normalization """ super(GRU_Regression, self).__init__(input_size, hidden_sizes, activation=activation, use_layer_normalization=use_layer_normalization, drop_prob=drop_prob, use_zoneout=use_zoneout, use_skip_connections=use_skip_connections, seed=seed) self.volume_manager = volume_manager self.n_gaussians = n_gaussians assert output_size == 3 # Only 3-dimensional target is supported for now self.output_size = output_size self.use_previous_direction = use_previous_direction # GRU_Mixture does not predict a direction, so it cannot predict an offset self.predict_offset = False # Do not use dropout/zoneout in last hidden layer self.layer_regression_size = sum([ n_gaussians, # Mixture weights n_gaussians * output_size, # Means n_gaussians * output_size ]) # Stds output_layer_input_size = sum( self.hidden_sizes ) if self.use_skip_connections else self.hidden_sizes[-1] self.layer_regression = LayerRegression(output_layer_input_size, self.layer_regression_size)
class GRU_RegressionAndBinaryClassification(GRU): """ A standard GRU model with both a regression output layer and binary classification output layer. The regression layer consists in fully connected layer (DenseLayer) whereas the binary classification layer consists in a fully connected layer with a sigmoid non-linearity to learn when to stop. """ def __init__(self, input_size, hidden_sizes, output_size, **_): """ Parameters ---------- input_size : int Number of units each element Xi in the input sequence X has. hidden_sizes : int, list of int Number of hidden units each GRU should have. output_size : int Number of units the regression layer should have. """ super().__init__(input_size, hidden_sizes) self.output_size = output_size self.layer_regression = LayerRegression(self.hidden_sizes[-1], self.output_size) self.stopping_layer = LayerDense(self.hidden_sizes[-1] + input_size, 1, activation="sigmoid", name="stopping") def initialize(self, weights_initializer=initer.UniformInitializer(1234)): super().initialize(weights_initializer) self.layer_regression.initialize(weights_initializer) self.stopping_layer.initialize(weights_initializer) @property def hyperparameters(self): hyperparameters = super().hyperparameters hyperparameters['output_size'] = self.output_size return hyperparameters @property def parameters(self): return super( ).parameters + self.layer_regression.parameters + self.stopping_layer.parameters def _fprop(self, Xi, Xi_plus1, *args): outputs = super()._fprop(Xi, *args) last_layer_h = outputs[len(self.hidden_sizes) - 1] regression_out = self.layer_regression.fprop(last_layer_h) stopping = self.stopping_layer.fprop( T.concatenate([last_layer_h, Xi_plus1], axis=1)) return outputs + (regression_out, stopping) def get_output(self, X): outputs_info_h = [] for hidden_size in self.hidden_sizes: outputs_info_h.append(T.zeros((X.shape[0], hidden_size))) results, updates = theano.scan( fn=self._fprop, outputs_info=outputs_info_h + [None, None], sequences=[{ "input": T.transpose( X, axes=(1, 0, 2) ), # We want to scan over sequence elements, not the examples. "taps": [0, 1] }], n_steps=X.shape[1] - 1) self.graph_updates = updates # Put back the examples so they are in the first dimension. self.regression_out = T.transpose(results[-2], axes=(1, 0, 2)) self.stopping = T.transpose(results[-1], axes=(1, 0, 2)) return self.regression_out, self.stopping def use(self, X): directions = self.get_output(X) return directions
class GRU_Multistep_Gaussian(GRU): """ A multistep GRU model used to predict multivariate gaussian parameters (means and standard deviations) For each target dimension, the model outputs (m) distribution parameters estimates for each prediction horizon up to (k) """ def __init__(self, volume_manager, input_size, hidden_sizes, target_dims, k, m, seed, use_previous_direction=False, use_layer_normalization=False, drop_prob=0., use_zoneout=False, **_): """ Parameters ---------- volume_manager : :class:`VolumeManger` object Used to evaluate the diffusion signal at specific coordinates using multiple subjects input_size : int Number of units each element Xi in the input sequence X has. hidden_sizes : int, list of int Number of hidden units each GRU should have. target_dims : int Number of dimensions of the multivariate gaussian to estimate; the model outputs two distribution parameters for each dimension k : int Number of steps ahead to predict (the model will predict all steps up to k) m : int Number of Monte-Carlo samples used to estimate the gaussian parameters seed : int Random seed to initialize the random noise used for sampling and dropout. use_previous_direction : bool Use the previous direction as an additional input use_layer_normalization : bool Use LayerNormalization to normalize preactivations and stabilize hidden layer evolution drop_prob : float Dropout/Zoneout probability for recurrent networks. See: https://arxiv.org/pdf/1512.05287.pdf & https://arxiv.org/pdf/1606.01305.pdf use_zoneout : bool Use zoneout implementation instead of dropout """ super().__init__(input_size, hidden_sizes, use_layer_normalization, drop_prob, use_zoneout, seed) self.target_dims = target_dims self.target_size = 2 * self.target_dims # Output distribution parameters mu and sigma for each dimension self.volume_manager = volume_manager self.k = k self.m = m self.seed = seed self.use_previous_direction = use_previous_direction self.srng = MRG_RandomStreams(self.seed) # Do not use dropout/zoneout in last hidden layer self.layer_regression = LayerRegression(self.hidden_sizes[-1], self.target_size, normed=False) def initialize(self, weights_initializer=initer.UniformInitializer(1234)): super().initialize(weights_initializer) self.layer_regression.initialize(weights_initializer) @property def hyperparameters(self): hyperparameters = super().hyperparameters hyperparameters['target_dims'] = self.target_dims hyperparameters['target_size'] = self.target_size hyperparameters['k'] = self.k hyperparameters['m'] = self.m hyperparameters['seed'] = self.seed hyperparameters['use_previous_direction'] = self.use_previous_direction return hyperparameters @property def parameters(self): return super().parameters + self.layer_regression.parameters def _fprop_step(self, Xi, *args): # Xi.shape : (batch_size, 4) *if self.use_previous_direction, Xi.shape : (batch_size,7) # coords + dwi ID (+ previous_direction) # coords : streamlines 3D coordinates. # coords.shape : (batch_size, 4) where the last column is a dwi ID. # args.shape : n_layers * (batch_size, layer_size) coords = Xi[:, :4] batch_size = Xi.shape[0] if self.k > 1: # Random noise used for sampling at each step (t+2)...(t+k) # epsilon.shape : (K-1, batch_size, target_dimensions) epsilon = self.srng.normal( (self.k - 1, batch_size, self.target_dims)) # Get diffusion data. # data_at_coords.shape : (batch_size, input_size) data_at_coords = self.volume_manager.eval_at_coords(coords) if self.use_previous_direction: # previous_direction.shape : (batch_size, 3) previous_direction = Xi[:, 4:] fprop_input = T.concatenate([data_at_coords, previous_direction], axis=1) else: fprop_input = data_at_coords # Hidden state to be passed to the next GRU iteration (next _fprop call) # next_hidden_state.shape : n_layers * (batch_size, layer_size) next_hidden_state = super()._fprop(fprop_input, *args) # Compute the distribution parameters for step (t) # distribution_params.shape : (batch_size, target_size) distribution_params = self._predict_distribution_params( next_hidden_state[-1]) # k_distribution_params = T.set_subtensor(k_distribution_params[:, 0, :, :], distribution_params) k_distribution_params = [distribution_params] sample_hidden_state = next_hidden_state for k in range(1, self.k): # Sample an input for the next step # sample_directions.shape : (batch_size, target_dimensions) sample_directions = self.get_stochastic_samples( distribution_params, epsilon[k - 1]) # Follow *unnormalized* direction and get diffusion data at the new location. coords = T.concatenate( [coords[:, :3] + sample_directions, coords[:, 3:]], axis=1) data_at_coords = self.volume_manager.eval_at_coords(coords) if self.use_previous_direction: # previous_direction.shape : (batch_size, 3) previous_direction = sample_directions fprop_input = T.concatenate( [data_at_coords, previous_direction], axis=1) else: fprop_input = data_at_coords # Compute the sample distribution parameters for step (t+k) sample_hidden_state = super()._fprop(fprop_input, *sample_hidden_state) distribution_params = self._predict_distribution_params( sample_hidden_state[-1]) k_distribution_params += [distribution_params] k_distribution_params = T.stack(k_distribution_params, axis=1) return next_hidden_state + (k_distribution_params, ) @staticmethod def get_stochastic_samples(distribution_parameters, noise): # distribution_parameters.shape : (batch_size, [seq_len], target_size) # distribution_params[0] = [mu_x, mu_y, mu_z, std_x, std_y, std_z] # noise.shape : (batch_size, target_dims) mu = distribution_parameters[..., :3] sigma = distribution_parameters[..., 3:6] samples = mu + noise * sigma return samples @staticmethod def get_max_component_samples(distribution_parameters): # distribution_parameters.shape : (batch_size, [seq_len], target_size) # distribution_params[0] = [mu_x, mu_y, mu_z, std_x, std_y, std_z] mean = distribution_parameters[..., :3] return mean def _predict_distribution_params(self, hidden_state): # regression layer outputs an array [mean_x, mean_y, mean_z, log(std_x), log(std_y), log(std_z)] # regression_output.shape : (batch_size, target_size) regression_output = self.layer_regression.fprop(hidden_state) # Use T.exp to retrieve a positive sigma distribution_params = T.set_subtensor( regression_output[..., 3:6], T.exp(regression_output[..., 3:6])) # distribution_params.shape : (batch_size, target_size) return distribution_params def get_output(self, X): # X.shape : (batch_size, seq_len, n_features=4) # For tractography n_features is (x,y,z) + (dwi_id,) # Repeat Xs to compute M sample sequences for each input # inputs.shape : (batch_size*M, seq_len, n_features) inputs = T.repeat(X, self.m, axis=0) # outputs_info_h.shape : n_layers * (batch_size*M, layer_size) outputs_info_h = [] for hidden_size in self.hidden_sizes: outputs_info_h.append(T.zeros((inputs.shape[0], hidden_size))) # results.shape : n_layers * (seq_len, batch_size*M, layer_size), (seq_len, batch_size*M, K, target_size) results, updates = theano.scan( fn=self. _fprop_step, # We want to scan over sequence elements, not the examples. sequences=[T.transpose(inputs, axes=(1, 0, 2))], outputs_info=outputs_info_h + [None], non_sequences=self.parameters + self.volume_manager.volumes, strict=True) self.graph_updates = updates # Put back the examples so they are in the first dimension # transposed.shape : (batch_size*M, seq_len, K, target_size) transposed = T.transpose(results[-1], axes=(1, 0, 2, 3)) # Split the M sample sequences into a new dimension # reshaped.shape : (batch_size, M, seq_len, K, target_size) reshaped = T.reshape( transposed, (X.shape[0], self.m, X.shape[1], self.k, self.target_size)) # Transpose the output to get the M sequences dimension in the right place # regression_out.shape : (batch_size, seq_len, K, M, target_size) regression_out = T.transpose(reshaped, (0, 2, 3, 1, 4)) return regression_out def make_sequence_generator(self, subject_id=0, use_max_component=False): """ Makes functions that return the prediction for x_{t+1} for every sequence in the batch given x_{t} and the current state of the model h^{l}_{t}. Parameters ---------- subject_id : int, optional ID of the subject from which its diffusion data will be used. Default: 0. """ # Build the sequence generator as a theano function. states_h = [] for i in range(len(self.hidden_sizes)): state_h = T.matrix(name="layer{}_state_h".format(i)) states_h.append(state_h) symb_x_t = T.matrix(name="x_t") # Temporarily set $k$ to one. k_bak = self.k self.k = 1 new_states = self._fprop_step(symb_x_t, *states_h) new_states_h = new_states[:len(self.hidden_sizes)] # model_output.shape : (batch_size, K=1, target_size) model_output = new_states[-1] distribution_params = model_output[:, 0, :] if use_max_component: predictions = self.get_max_component_samples(distribution_params) else: # Sample value from distribution srng = MRG_RandomStreams(seed=1234) batch_size = symb_x_t.shape[0] noise = srng.normal((batch_size, self.target_dims)) # predictions.shape : (batch_size, target_dims) predictions = self.get_stochastic_samples(distribution_params, noise) f = theano.function(inputs=[symb_x_t] + states_h, outputs=[predictions] + list(new_states_h)) self.k = k_bak # Restore original $k$. def _gen(x_t, states, previous_direction=None): """ Returns the prediction for x_{t+1} for every sequence in the batch given x_{t} and the current states of the model h^{l}_{t}. Parameters ---------- x_t : ndarray with shape (batch_size, 3) Streamline coordinate (x, y, z). states : list of 2D array of shape (batch_size, hidden_size) Currrent states of the network. previous_direction : ndarray with shape (batch_size, 3) If using previous direction, these should be added to the input Returns ------- next_x_t : ndarray with shape (batch_size, 3) Directions to follow. new_states : list of 2D array of shape (batch_size, hidden_size) Updated states of the network after seeing x_t. """ # Append the DWI ID of each sequence after the 3D coordinates. subject_ids = np.array([subject_id] * len(x_t), dtype=floatX)[:, None] if not self.use_previous_direction: x_t = np.c_[x_t, subject_ids] else: x_t = np.c_[x_t, subject_ids, previous_direction] results = f(x_t, *states) next_x_t = results[0] new_states = results[1:] return next_x_t, new_states return _gen def save(self, path): super().save(path) savedir = smartutils.create_folder(pjoin(path, type(self).__name__)) state = { "version": 1, "_srng_rstate": self.srng.rstate, "_srng_state_updates": [ state_update[0].get_value() for state_update in self.srng.state_updates ] } np.savez(pjoin(savedir, "state.npz"), **state) def load(self, path): super().load(path) loaddir = pjoin(path, type(self).__name__) state = np.load(pjoin(loaddir, 'state.npz')) self.srng.rstate[:] = state['_srng_rstate'] for state_update, saved_state in zip(self.srng.state_updates, state["_srng_state_updates"]): state_update[0].set_value(saved_state)
class GRU_Multistep_Gaussian(GRU): """ A multistep GRU model used to predict multivariate gaussian parameters (means and standard deviations) For each target dimension, the model outputs (m) distribution parameters estimates for each prediction horizon up to (k) """ def __init__( self, volume_manager, input_size, hidden_sizes, target_dims, k, m, seed, use_previous_direction=False, **_ ): """ Parameters ---------- volume_manager : :class:`VolumeManger` object Used to evaluate the diffusion signal at specific coordinates using multiple subjects input_size : int Number of units each element Xi in the input sequence X has. hidden_sizes : int, list of int Number of hidden units each GRU should have. target_dims : int Number of dimensions of the multivariate gaussian to estimate; the model outputs two distribution parameters for each dimension k : int Number of steps ahead to predict (the model will predict all steps up to k) m : int Number of Monte-Carlo samples used to estimate the gaussian parameters seed : int Random seed to initialize the random noise used for sampling use_previous_direction : bool Use the previous direction as an additional input """ super().__init__(input_size, hidden_sizes) self.target_dims = target_dims self.target_size = 2 * self.target_dims # Output distribution parameters mu and sigma for each dimension self.layer_regression = LayerRegression(self.hidden_sizes[-1], self.target_size, normed=False) self.volume_manager = volume_manager self.k = k self.m = m self.seed = seed self.use_previous_direction = use_previous_direction self.srng = MRG_RandomStreams(self.seed) def initialize(self, weights_initializer=initer.UniformInitializer(1234)): super().initialize(weights_initializer) self.layer_regression.initialize(weights_initializer) @property def hyperparameters(self): hyperparameters = super().hyperparameters hyperparameters["target_dims"] = self.target_dims hyperparameters["target_size"] = self.target_size hyperparameters["k"] = self.k hyperparameters["m"] = self.m hyperparameters["seed"] = self.seed hyperparameters["use_previous_direction"] = self.use_previous_direction return hyperparameters @property def parameters(self): return super().parameters + self.layer_regression.parameters def _fprop_step(self, Xi, *args): # Xi.shape : (batch_size, 4) *if self.use_previous_direction, Xi.shape : (batch_size,7) # coords + dwi ID (+ previous_direction) # coords : streamlines 3D coordinates. # coords.shape : (batch_size, 4) where the last column is a dwi ID. # args.shape : n_layers * (batch_size, layer_size) coords = Xi[:, :4] batch_size = Xi.shape[0] if self.k > 1: # Random noise used for sampling at each step (t+2)...(t+k) # epsilon.shape : (K-1, batch_size, target_dimensions) epsilon = self.srng.normal((self.k - 1, batch_size, self.target_dims)) # Get diffusion data. # data_at_coords.shape : (batch_size, input_size) data_at_coords = self.volume_manager.eval_at_coords(coords) if self.use_previous_direction: # previous_direction.shape : (batch_size, 3) previous_direction = Xi[:, 4:] fprop_input = T.concatenate([data_at_coords, previous_direction], axis=1) else: fprop_input = data_at_coords # Hidden state to be passed to the next GRU iteration (next _fprop call) # next_hidden_state.shape : n_layers * (batch_size, layer_size) next_hidden_state = super()._fprop(fprop_input, *args) # Compute the distribution parameters for step (t) # distribution_params.shape : (batch_size, target_size) distribution_params = self._predict_distribution_params(next_hidden_state[-1]) # k_distribution_params = T.set_subtensor(k_distribution_params[:, 0, :, :], distribution_params) k_distribution_params = [distribution_params] sample_hidden_state = next_hidden_state for k in range(1, self.k): # Sample an input for the next step # sample_directions.shape : (batch_size, target_dimensions) sample_directions = self.get_stochastic_samples(distribution_params, epsilon[k - 1]) # Follow *unnormalized* direction and get diffusion data at the new location. coords = T.concatenate([coords[:, :3] + sample_directions, coords[:, 3:]], axis=1) data_at_coords = self.volume_manager.eval_at_coords(coords) if self.use_previous_direction: # previous_direction.shape : (batch_size, 3) previous_direction = sample_directions fprop_input = T.concatenate([data_at_coords, previous_direction], axis=1) else: fprop_input = data_at_coords # Compute the sample distribution parameters for step (t+k) sample_hidden_state = super()._fprop(fprop_input, *sample_hidden_state) distribution_params = self._predict_distribution_params(sample_hidden_state[-1]) k_distribution_params += [distribution_params] k_distribution_params = T.stack(k_distribution_params, axis=1) return next_hidden_state + (k_distribution_params,) @staticmethod def get_stochastic_samples(distribution_parameters, noise): # distribution_parameters.shape : (batch_size, [seq_len], target_size) # distribution_params[0] = [mu_x, mu_y, mu_z, std_x, std_y, std_z] # noise.shape : (batch_size, target_dims) mu = distribution_parameters[..., :3] sigma = distribution_parameters[..., 3:6] samples = mu + noise * sigma return samples @staticmethod def get_max_component_samples(distribution_parameters): # distribution_parameters.shape : (batch_size, [seq_len], target_size) # distribution_params[0] = [mu_x, mu_y, mu_z, std_x, std_y, std_z] mean = distribution_parameters[..., :3] return mean def _predict_distribution_params(self, hidden_state): # regression layer outputs an array [mean_x, mean_y, mean_z, log(std_x), log(std_y), log(std_z)] # regression_output.shape : (batch_size, target_size) regression_output = self.layer_regression.fprop(hidden_state) # Use T.exp to retrieve a positive sigma distribution_params = T.set_subtensor(regression_output[..., 3:6], T.exp(regression_output[..., 3:6])) # distribution_params.shape : (batch_size, target_size) return distribution_params def get_output(self, X): # X.shape : (batch_size, seq_len, n_features=4) # For tractography n_features is (x,y,z) + (dwi_id,) # Repeat Xs to compute M sample sequences for each input # inputs.shape : (batch_size*M, seq_len, n_features) inputs = T.repeat(X, self.m, axis=0) # outputs_info_h.shape : n_layers * (batch_size*M, layer_size) outputs_info_h = [] for hidden_size in self.hidden_sizes: outputs_info_h.append(T.zeros((inputs.shape[0], hidden_size))) # results.shape : n_layers * (seq_len, batch_size*M, layer_size), (seq_len, batch_size*M, K, target_size) results, updates = theano.scan( fn=self._fprop_step, # We want to scan over sequence elements, not the examples. sequences=[T.transpose(inputs, axes=(1, 0, 2))], outputs_info=outputs_info_h + [None], non_sequences=self.parameters + self.volume_manager.volumes, strict=True, ) self.graph_updates = updates # Put back the examples so they are in the first dimension # transposed.shape : (batch_size*M, seq_len, K, target_size) transposed = T.transpose(results[-1], axes=(1, 0, 2, 3)) # Split the M sample sequences into a new dimension # reshaped.shape : (batch_size, M, seq_len, K, target_size) reshaped = T.reshape(transposed, (X.shape[0], self.m, X.shape[1], self.k, self.target_size)) # Transpose the output to get the M sequences dimension in the right place # regression_out.shape : (batch_size, seq_len, K, M, target_size) regression_out = T.transpose(reshaped, (0, 2, 3, 1, 4)) return regression_out def make_sequence_generator(self, subject_id=0, use_max_component=False): """ Makes functions that return the prediction for x_{t+1} for every sequence in the batch given x_{t} and the current state of the model h^{l}_{t}. Parameters ---------- subject_id : int, optional ID of the subject from which its diffusion data will be used. Default: 0. """ # Build the sequence generator as a theano function. states_h = [] for i in range(len(self.hidden_sizes)): state_h = T.matrix(name="layer{}_state_h".format(i)) states_h.append(state_h) symb_x_t = T.matrix(name="x_t") # Temporarily set $k$ to one. k_bak = self.k self.k = 1 new_states = self._fprop_step(symb_x_t, *states_h) new_states_h = new_states[: len(self.hidden_sizes)] # model_output.shape : (batch_size, K=1, target_size) model_output = new_states[-1] distribution_params = model_output[:, 0, :] if use_max_component: predictions = self.get_max_component_samples(distribution_params) else: # Sample value from distribution srng = MRG_RandomStreams(seed=1234) batch_size = symb_x_t.shape[0] noise = srng.normal((batch_size, self.target_dims)) # predictions.shape : (batch_size, target_dims) predictions = self.get_stochastic_samples(distribution_params, noise) f = theano.function(inputs=[symb_x_t] + states_h, outputs=[predictions] + list(new_states_h)) self.k = k_bak # Restore original $k$. def _gen(x_t, states, previous_direction=None): """ Returns the prediction for x_{t+1} for every sequence in the batch given x_{t} and the current states of the model h^{l}_{t}. Parameters ---------- x_t : ndarray with shape (batch_size, 3) Streamline coordinate (x, y, z). states : list of 2D array of shape (batch_size, hidden_size) Currrent states of the network. previous_direction : ndarray with shape (batch_size, 3) If using previous direction, these should be added to the input Returns ------- next_x_t : ndarray with shape (batch_size, 3) Directions to follow. new_states : list of 2D array of shape (batch_size, hidden_size) Updated states of the network after seeing x_t. """ # Append the DWI ID of each sequence after the 3D coordinates. subject_ids = np.array([subject_id] * len(x_t), dtype=floatX)[:, None] if not self.use_previous_direction: x_t = np.c_[x_t, subject_ids] else: x_t = np.c_[x_t, subject_ids, previous_direction] results = f(x_t, *states) next_x_t = results[0] new_states = results[1:] return next_x_t, new_states return _gen def save(self, path): super().save(path) savedir = smartutils.create_folder(pjoin(path, type(self).__name__)) state = { "version": 1, "_srng_rstate": self.srng.rstate, "_srng_state_updates": [state_update[0].get_value() for state_update in self.srng.state_updates], } np.savez(pjoin(savedir, "state.npz"), **state) def load(self, path): super().load(path) loaddir = pjoin(path, type(self).__name__) state = np.load(pjoin(loaddir, "state.npz")) self.srng.rstate[:] = state["_srng_rstate"] for state_update, saved_state in zip(self.srng.state_updates, state["_srng_state_updates"]): state_update[0].set_value(saved_state)
class FFNN_Regression(FFNN): """ A standard FFNN model with a regression layer stacked on top of it. """ def __init__(self, volume_manager, input_size, hidden_sizes, output_size, activation, use_previous_direction=False, **_): """ Parameters ---------- volume_manager : :class:`VolumeManger` object Use to evaluate the diffusion signal at specific coordinates. input_size : int Number of units each element X has. hidden_sizes : int, list of int Number of hidden units each FFNN layer should have. output_size : int Number of units the regression layer should have. activation : str Name of the activation function to use in the hidden layers use_previous_direction : bool Use the previous direction as an additional input """ super().__init__(input_size, hidden_sizes, activation) self.volume_manager = volume_manager self.output_size = output_size self.use_previous_direction = use_previous_direction self.layer_regression = LayerRegression(self.hidden_sizes[-1], self.output_size) def initialize(self, weights_initializer=initer.UniformInitializer(1234)): super().initialize(weights_initializer) self.layer_regression.initialize(weights_initializer) @property def hyperparameters(self): hyperparameters = super().hyperparameters hyperparameters['output_size'] = self.output_size hyperparameters['use_previous_direction'] = self.use_previous_direction return hyperparameters @property def parameters(self): return super().parameters + self.layer_regression.parameters def _fprop(self, Xi, *args): # Xi.shape : (batch_size, 4) *if self.use_previous_direction, Xi.shape : (batch_size,7) # coords + dwi ID (+ previous_direction) # coords : streamlines 3D coordinates. # coords.shape : (batch_size, 4) where the last column is a dwi ID. # args.shape : n_layers * (batch_size, layer_size) coords = Xi[:, :4] # Get diffusion data. # data_at_coords.shape : (batch_size, input_size) data_at_coords = self.volume_manager.eval_at_coords(coords) if self.use_previous_direction: # previous_direction.shape : (batch_size, 3) previous_direction = Xi[:, 4:] fprop_input = T.concatenate([data_at_coords, previous_direction], axis=1) else: fprop_input = data_at_coords # Hidden state to be passed to the next GRU iteration (next _fprop call) # next_hidden_state.shape : n_layers * (batch_size, layer_size) layer_outputs = super()._fprop(fprop_input) # Compute the direction to follow for step (t) regression_out = self.layer_regression.fprop(layer_outputs[-1]) return layer_outputs + (regression_out,) def make_sequence_generator(self, subject_id=0, **_): """ Makes functions that return the prediction for x_{t+1} for every sequence in the batch given x_{t}. Parameters ---------- subject_id : int, optional ID of the subject from which its diffusion data will be used. Default: 0. """ # Build the sequence generator as a theano function. symb_x_t = T.matrix(name="x_t") layer_outputs = self._fprop(symb_x_t) # predictions.shape : (batch_size, target_size) predictions = layer_outputs[-1] f = theano.function(inputs=[symb_x_t], outputs=[predictions]) def _gen(x_t, states, previous_direction=None): """ Returns the prediction for x_{t+1} for every sequence in the batch given x_{t}. Parameters ---------- x_t : ndarray with shape (batch_size, 3) Streamline coordinate (x, y, z). states : list of 2D array of shape (batch_size, hidden_size) Currrent states of the network. previous_direction : ndarray with shape (batch_size, 3) If using previous direction, these should be added to the input Returns ------- next_x_t : ndarray with shape (batch_size, 3) Directions to follow. new_states : list of 2D array of shape (batch_size, hidden_size) Updated states of the network after seeing x_t. """ # Append the DWI ID of each sequence after the 3D coordinates. subject_ids = np.array([subject_id] * len(x_t), dtype=floatX)[:, None] if not self.use_previous_direction: x_t = np.c_[x_t, subject_ids] else: x_t = np.c_[x_t, subject_ids, previous_direction] results = f(x_t) next_x_t = results[-1] next_x_t_both_directions = np.stack([next_x_t, -next_x_t], axis=-1) next_x_t = next_x_t_both_directions[ (np.arange(next_x_t_both_directions.shape[0])[:, None]), (np.arange(next_x_t_both_directions.shape[1])[None, :]), np.argmin(np.linalg.norm(next_x_t_both_directions - previous_direction[:, :, None], axis=1), axis=1)[:, None]] # FFNN_Regression is not a recurrent network, return original states new_states = states return next_x_t, new_states return _gen
def __init__(self, volume_manager, input_size, hidden_sizes, output_size, use_previous_direction=False, use_layer_normalization=False, drop_prob=0., use_zoneout=False, use_skip_connections=False, neighborhood_radius=False, learn_to_stop=False, seed=1234, **_): """ Parameters ---------- volume_manager : :class:`VolumeManger` object Use to evaluate the diffusion signal at specific coordinates. input_size : int Number of units each element Xi in the input sequence X has. hidden_sizes : int, list of int Number of hidden units each GRU should have. output_size : int Number of units the regression layer should have. use_previous_direction : bool Use the previous direction as an additional input use_layer_normalization : bool Use LayerNormalization to normalize preactivations and stabilize hidden layer evolution drop_prob : float Dropout/Zoneout probability for recurrent networks. See: https://arxiv.org/pdf/1512.05287.pdf & https://arxiv.org/pdf/1606.01305.pdf use_zoneout : bool Use zoneout implementation instead of dropout use_skip_connections : bool Use skip connections from the input to all hidden layers in the network, and from all hidden layers to the output layer neighborhood_radius : float Add signal in positions around the current streamline coordinate to the input (with given length in voxel space); None = no neighborhood learn_to_stop : bool Predict whether the streamline being generated should stop or not seed : int Random seed used for dropout normalization """ self.neighborhood_radius = neighborhood_radius self.model_input_size = input_size if self.neighborhood_radius: self.neighborhood_directions = get_neighborhood_directions( self.neighborhood_radius) # Model input size is increased when using neighborhood self.model_input_size = input_size * self.neighborhood_directions.shape[ 0] super(GRU_Regression, self).__init__(self.model_input_size, hidden_sizes, use_layer_normalization=use_layer_normalization, drop_prob=drop_prob, use_zoneout=use_zoneout, use_skip_connections=use_skip_connections, seed=seed) # Restore input size self.input_size = input_size self.volume_manager = volume_manager assert output_size == 3 # Only 3-dimensional target is supported for now self.output_size = output_size self.use_previous_direction = use_previous_direction # GRU_Gaussian does not predict a direction, so it cannot predict an offset self.predict_offset = False self.learn_to_stop = learn_to_stop # Do not use dropout/zoneout in last hidden layer self.layer_regression_size = sum([ output_size, # Means output_size ]) # Stds output_layer_input_size = sum( self.hidden_sizes ) if self.use_skip_connections else self.hidden_sizes[-1] self.layer_regression = LayerRegression(output_layer_input_size, self.layer_regression_size) if self.learn_to_stop: # Predict whether a streamline should stop or keep growing self.layer_stopping = LayerDense(output_layer_input_size, 1, activation='sigmoid', name="GRU_Gaussian_stopping")