def _compute_losses(self, model_output): mask = self.dataset.symb_mask # regression_outputs.shape = (batch_size, seq_length, out_dim) stopping_criteria_outputs = model_output[0][:, :, 0] regression_outputs = model_output[1] regression_targets = self.dataset.symb_targets[:, :, :3] stopping_criteria_targets = self.dataset.symb_targets[:, :, 3] if self.normalize_output: regression_outputs /= l2distance(regression_outputs, keepdims=True, eps=self.eps) self.samples = regression_outputs l2_loss_per_time_step = l2distance(self.samples, regression_targets, eps=self.eps) stopping_cross_entropy_per_time_step = T.nnet.binary_crossentropy( stopping_criteria_outputs, stopping_criteria_targets) # loss_per_time_step.shape = (batch_size, seq_len) self.loss_per_time_step = l2_loss_per_time_step + stopping_cross_entropy_per_time_step # loss_per_seq.shape = (batch_size,) self.loss_per_seq = T.sum(self.loss_per_time_step * mask, axis=1) if not self.sum_over_timestep: self.loss_per_seq /= T.sum(mask, axis=1) return self.loss_per_seq
def _compute_losses(self, model_output): # model_output.shape : (batch_size, seq_len, K, M, target_size) # self.dataset.symb_targets.shape = (batch_size, seq_len+K-1, target_dims) # mask.shape : (batch_size, seq_len) or None mask = self.dataset.symb_mask # mu.shape = (batch_size, seq_len, K, M, target_dims) mu = model_output[:, :, :, :, 0:3] # sigma.shape = (batch_size, seq_len, K, M, target_dims) sigma = model_output[:, :, :, :, 3:6] # Stack K targets for each input (sliding window style) # targets.shape = (batch_size, seq_len, K, target_dims) targets = T.stack( [self.dataset.symb_targets[:, i : (-self.model.k + i + 1) or None] for i in range(self.model.k)], axis=2 ) # Add new axis for sum over M # targets.shape = (batch_size, seq_len, K, 1, target_dims) targets = targets[:, :, :, None, :] # For monitoring the L2 error of using $mu$ as the predicted direction (should be comparable to MICCAI's work). normalized_mu = mu[:, :, 0, 0] / l2distance(mu[:, :, 0, 0], keepdims=True, eps=1e-8) normalized_targets = targets[:, :, 0, 0] / l2distance(targets[:, :, 0, 0], keepdims=True, eps=1e-8) self.L2_error_per_item = T.sqrt(T.sum(((normalized_mu - normalized_targets) ** 2), axis=2)) if mask is not None: self.mean_sqr_error = T.sum(self.L2_error_per_item * mask, axis=1) / T.sum(mask, axis=1) else: self.mean_sqr_error = T.mean(self.L2_error_per_item, axis=1) # Likelihood of multivariate gaussian (n dimensions) is : # ((2 \pi)^D |\Sigma|)^{-1/2} exp(-1/2 (x - \mu)^T \Sigma^-1 (x - \mu)) # We suppose a diagonal covariance matrix, so we have : # => |\Sigma| = \prod_n \sigma_n^2 # => (x - \mu)^T \Sigma^-1 (x - \mu) = \sum_n ((x_n - \mu_n) / \sigma_n)^2 m_log_likelihoods = -np.float32((self.target_dims / 2.0) * np.log(2 * np.pi)) + T.sum( -T.log(sigma) - 0.5 * T.sqr((targets - mu) / sigma), axis=4 ) # k_losses_per_timestep.shape : (batch_size, seq_len, K) self.k_losses_per_timestep = T.log(self.m) - logsumexp(m_log_likelihoods, axis=3, keepdims=False) # loss_per_timestep.shape : (batch_size, seq_len) self.loss_per_time_step = T.mean(self.k_losses_per_timestep, axis=2) # Average over sequence steps. # k_nlls_per_seq.shape :(batch_size, K) if mask is not None: self.k_losses_per_seq = T.sum(self.k_losses_per_timestep * mask[:, :, None], axis=1) / T.sum( mask, axis=1, keepdims=True ) else: self.k_losses_per_seq = T.mean(self.k_losses_per_timestep, axis=1) # Average over K # loss_per_seq.shape :(batch_size,) self.loss_per_seq = T.mean(self.k_losses_per_seq, axis=1) return self.loss_per_seq
def _compute_losses(self, model_output): mask = self.dataset.symb_mask regression_outputs, stopping = model_output # regression_outputs.shape = (batch_size, seq_length, out_dim) regression_outputs = model_output if self.normalize_output: regression_outputs /= l2distance(regression_outputs, keepdims=True, eps=1e-8) # Regression part (next direction) # L2_errors_per_time_step.shape = (batch_size,) self.L2_errors_per_time_step = l2distance(regression_outputs, self.dataset.symb_targets) # avg_L2_error_per_seq.shape = (batch_size,) self.avg_L2_error_per_seq = T.sum(self.L2_errors_per_time_step*mask, axis=1) / T.sum(mask, axis=1) # Binary classification part (stopping criterion) lengths = T.sum(mask, axis=1) lengths_int = T.cast(lengths, dtype="int32") # Mask values are floats. idx_examples = T.arange(mask.shape[0]) # Create a mask that does not contain the last element of each sequence. smaller_mask = T.set_subtensor(mask[idx_examples, lengths_int-1], 0) # Compute cross-entropy for non-ending points. target = T.zeros(1) cross_entropy_not_ending = T.sum(T.nnet.binary_crossentropy(stopping, target)*smaller_mask[:, :, None], axis=[1, 2]) # Compute cross-entropy for ending points. # We add a scaling factor because there is only one ending point per sequence whereas # there multiple non-ending points. target = T.ones(1) cross_entropy_ending = T.nnet.binary_crossentropy(stopping[idx_examples, lengths_int-1, 0], target) * (lengths-1) self.cross_entropy = (cross_entropy_not_ending + cross_entropy_ending) / lengths return self.avg_L2_error_per_seq + self.cross_entropy
def _compute_losses(self, model_output): # regression_outputs.shape = (batch_size, out_dim) regression_outputs = model_output if self.normalize_output: regression_outputs /= l2distance(regression_outputs, keepdims=True, eps=self.eps) self.samples = regression_outputs # loss_per_time_step.shape = (batch_size,) self.loss_per_time_step = l2distance(self.samples, self.dataset.symb_targets) return self.loss_per_time_step
def _compute_losses(self, model_output): # regression_outputs.shape = (batch_size, out_dim) regression_outputs = model_output if self.normalize_output: regression_outputs /= l2distance(regression_outputs, keepdims=True, eps=self.eps) self.samples = regression_outputs # loss_per_time_step.shape = (batch_size,) self.loss_per_time_step = T.min( T.stack([l2distance(self.samples, self.dataset.symb_targets), l2distance(self.samples, -self.dataset.symb_targets)], axis=1), axis=1) return self.loss_per_time_step
def _compute_losses(self, model_output): mask = self.dataset.symb_mask # regression_outputs.shape = (batch_size, seq_length, regression_layer_size) regression_outputs = model_output # mixture_weights.shape : (batch_size, seq_len, n_gaussians) # means.shape : (batch_size, seq_len, n_gaussians, 3) mixture_weights, means, stds = self.model.get_mixture_parameters( regression_outputs, ndim=4) maximum_component_ids = T.argmax(mixture_weights, axis=2) # samples.shape : (batch_size, seq_len, 3) self.samples = means[(T.arange(mixture_weights.shape[0])[:, None]), (T.arange(mixture_weights.shape[1])[None, :]), maximum_component_ids] # loss_per_time_step.shape = (batch_size, seq_len) self.loss_per_time_step = l2distance(self.samples, self.dataset.symb_targets) # loss_per_seq.shape = (batch_size,) self.loss_per_seq = T.sum(self.loss_per_time_step * mask, axis=1) / T.sum(mask, axis=1) return self.loss_per_seq
def fprop(self, X): out = T.dot(X, self.W) + self.b # Normalize the output vector. if self.normed: out /= l2distance(out, keepdims=True, eps=1e-8) return out
def _compute_losses(self, model_output): mask = self.dataset.symb_mask # regression_outputs.shape = (batch_size, seq_length, out_dim) regression_outputs = model_output if self.normalize_output: regression_outputs /= l2distance(regression_outputs, keepdims=True, eps=self.eps) self.samples = regression_outputs # loss_per_time_step.shape = (batch_size, seq_len) self.loss_per_time_step = l2distance(self.samples, self.dataset.symb_targets) # loss_per_seq.shape = (batch_size,) self.loss_per_seq = T.sum(self.loss_per_time_step*mask, axis=1) / T.sum(mask, axis=1) return self.loss_per_seq
def _compute_losses(self, model_output): mask = self.dataset.symb_mask regression_outputs, stopping = model_output # regression_outputs.shape = (batch_size, seq_length, out_dim) regression_outputs = model_output if self.normalize_output: regression_outputs /= l2distance(regression_outputs, keepdims=True, eps=1e-8) # Regression part (next direction) # L2_errors_per_time_step.shape = (batch_size,) self.L2_errors_per_time_step = l2distance(regression_outputs, self.dataset.symb_targets) # avg_L2_error_per_seq.shape = (batch_size,) self.avg_L2_error_per_seq = T.sum(self.L2_errors_per_time_step * mask, axis=1) / T.sum(mask, axis=1) # Binary classification part (stopping criterion) lengths = T.sum(mask, axis=1) lengths_int = T.cast(lengths, dtype="int32") # Mask values are floats. idx_examples = T.arange(mask.shape[0]) # Create a mask that does not contain the last element of each sequence. smaller_mask = T.set_subtensor(mask[idx_examples, lengths_int - 1], 0) # Compute cross-entropy for non-ending points. target = T.zeros(1) cross_entropy_not_ending = T.sum( T.nnet.binary_crossentropy(stopping, target) * smaller_mask[:, :, None], axis=[1, 2]) # Compute cross-entropy for ending points. # We add a scaling factor because there is only one ending point per sequence whereas # there multiple non-ending points. target = T.ones(1) cross_entropy_ending = T.nnet.binary_crossentropy( stopping[idx_examples, lengths_int - 1, 0], target) * (lengths - 1) self.cross_entropy = (cross_entropy_not_ending + cross_entropy_ending) / lengths return self.avg_L2_error_per_seq + self.cross_entropy
def fprop(self, X, dropout_W=None): # dropout_W is a row vector of inputs to be dropped W = self.W if dropout_W: W *= dropout_W[:, None] out = T.dot(X, W) + self.b # Normalize the output vector. if self.normed: out /= l2distance(out, keepdims=True, eps=1e-8) return out
def _compute_losses(self, model_output): # regression_outputs.shape = (batch_size, out_dim) regression_outputs = model_output if self.normalize_output: regression_outputs /= l2distance(regression_outputs, keepdims=True, eps=self.eps) self.samples = regression_outputs # Maximize squared cosine similarity = minimize -cos**2 # loss_per_time_step.shape = (batch_size,) self.loss_per_time_step = -T.square(T.sum(self.samples*self.dataset.symb_targets, axis=1)) return self.loss_per_time_step
def _compute_losses(self, model_output): mask = self.dataset.symb_mask # regression_outputs.shape = (batch_size, seq_length, out_dim) regression_outputs = model_output if self.normalize_output: regression_outputs /= l2distance(regression_outputs, keepdims=True, eps=self.eps) self.samples = regression_outputs # loss_per_time_step.shape = (batch_size, seq_len) self.loss_per_time_step = l2distance(self.samples, self.dataset.symb_targets, eps=self.eps) # loss_per_seq.shape = (batch_size,) self.loss_per_seq = T.sum(self.loss_per_time_step * mask, axis=1) if not self.sum_over_timestep: self.loss_per_seq /= T.sum(mask, axis=1) return self.loss_per_seq
def fprop(self, X, dropout_W=None): # dropout_W is a row vector of inputs to be dropped W = self.W if dropout_W: W *= dropout_W[:, None] units_inputs = T.dot(X, W) mean = T.mean(units_inputs, axis=1, keepdims=True) std = T.std(units_inputs, axis=1, keepdims=True) units_inputs_normalized = (units_inputs - mean) / (std + self.eps) out = self.g * units_inputs_normalized + self.b # Normalize the output vector. if self.normed: out /= l2distance(out, keepdims=True, eps=1e-8) return out
def _compute_losses(self, model_output): # model_output.shape : (batch_size, seq_len, K, M, target_size) # self.dataset.symb_targets.shape = (batch_size, seq_len+K-1, target_dims) # targets.shape = (batch_size, seq_len, 3) targets = self.dataset.symb_targets[:, : -self.model.k + 1 or None, :] # mask.shape : (batch_size, seq_len) mask = self.dataset.symb_mask # samples.shape : (batch_size, seq_len, 3) # T.squeeze(.) should remove the K=1 and M=1 dimensions self.samples = self.model.get_max_component_samples(T.squeeze(model_output)) # loss_per_time_step.shape = (batch_size, seq_len) self.loss_per_time_step = l2distance(self.samples, targets) # loss_per_seq.shape = (batch_size,) self.loss_per_seq = T.sum(self.loss_per_time_step * mask, axis=1) / T.sum(mask, axis=1) return self.loss_per_seq
def _compute_losses(self, model_output): mask = self.dataset.symb_mask # regression_outputs.shape = (batch_size, seq_length, regression_layer_size) regression_outputs = model_output mixture_weights, means, stds = self.model.get_mixture_parameters(regression_outputs, ndim=4) # mixture_weights.shape : (batch_size, seq_len, n_gaussians) # means.shape : (batch_size, seq_len, n_gaussians, 3) # samples.shape : (batch_size, seq_len, 3) self.samples = T.sum(mixture_weights[:, :, :, None] * means, axis=2) # loss_per_time_step.shape = (batch_size, seq_len) self.loss_per_time_step = l2distance(self.samples, self.dataset.symb_targets) # loss_per_seq.shape = (batch_size,) self.loss_per_seq = T.sum(self.loss_per_time_step*mask, axis=1) / T.sum(mask, axis=1) return self.loss_per_seq
def _compute_losses(self, model_output): # model_output.shape : (batch_size, seq_len, K, M, target_size) # self.dataset.symb_targets.shape = (batch_size, seq_len+K-1, target_dims) # targets.shape = (batch_size, seq_len, 3) targets = self.dataset.symb_targets[:, :-self.model.k + 1 or None, :] # mask.shape : (batch_size, seq_len) mask = self.dataset.symb_mask # samples.shape : (batch_size, seq_len, 3) # T.squeeze(.) should remove the K=1 and M=1 dimensions self.samples = self.model.get_max_component_samples( T.squeeze(model_output)) # loss_per_time_step.shape = (batch_size, seq_len) self.loss_per_time_step = l2distance(self.samples, targets) # loss_per_seq.shape = (batch_size,) self.loss_per_seq = T.sum(self.loss_per_time_step * mask, axis=1) / T.sum(mask, axis=1) return self.loss_per_seq
def _compute_losses(self, model_output): mask = self.dataset.symb_mask # regression_outputs.shape = (batch_size, seq_length, regression_layer_size) regression_outputs = model_output # mu.shape : (batch_size, seq_len, 3) # sigma.shape : (batch_size, seq_len, 3) mu, sigma = self.model.get_distribution_parameters(regression_outputs) # targets.shape : (batch_size, seq_len, 3) targets = self.dataset.symb_targets # samples.shape : (batch_size, seq_len, 3) self.samples = mu # loss_per_time_step.shape = (batch_size, seq_len) self.loss_per_time_step = l2distance(self.samples, targets) # loss_per_seq.shape = (batch_size,) self.loss_per_seq = T.sum(self.loss_per_time_step * mask, axis=1) / T.sum(mask, axis=1) return self.loss_per_seq
def _compute_losses(self, model_output): # model_output.shape : (batch_size, seq_len, K, M, target_size) # self.dataset.symb_targets.shape = (batch_size, seq_len+K-1, target_dims) # mask.shape : (batch_size, seq_len) or None mask = self.dataset.symb_mask # mu.shape = (batch_size, seq_len, K, M, target_dims) mu = model_output[:, :, :, :, 0:3] # sigma.shape = (batch_size, seq_len, K, M, target_dims) sigma = model_output[:, :, :, :, 3:6] # Stack K targets for each input (sliding window style) # targets.shape = (batch_size, seq_len, K, target_dims) targets = T.stack([ self.dataset.symb_targets[:, i:(-self.model.k + i + 1) or None] for i in range(self.model.k) ], axis=2) # Add new axis for sum over M # targets.shape = (batch_size, seq_len, K, 1, target_dims) targets = targets[:, :, :, None, :] # For monitoring the L2 error of using $mu$ as the predicted direction (should be comparable to MICCAI's work). normalized_mu = mu[:, :, 0, 0] / l2distance( mu[:, :, 0, 0], keepdims=True, eps=1e-8) normalized_targets = targets[:, :, 0, 0] / l2distance( targets[:, :, 0, 0], keepdims=True, eps=1e-8) self.L2_error_per_item = T.sqrt( T.sum(((normalized_mu - normalized_targets)**2), axis=2)) if mask is not None: self.mean_sqr_error = T.sum(self.L2_error_per_item * mask, axis=1) / T.sum(mask, axis=1) else: self.mean_sqr_error = T.mean(self.L2_error_per_item, axis=1) # Likelihood of multivariate gaussian (n dimensions) is : # ((2 \pi)^D |\Sigma|)^{-1/2} exp(-1/2 (x - \mu)^T \Sigma^-1 (x - \mu)) # We suppose a diagonal covariance matrix, so we have : # => |\Sigma| = \prod_n \sigma_n^2 # => (x - \mu)^T \Sigma^-1 (x - \mu) = \sum_n ((x_n - \mu_n) / \sigma_n)^2 m_log_likelihoods = -np.float32( (self.target_dims / 2.) * np.log(2 * np.pi)) + T.sum( -T.log(sigma) - 0.5 * T.sqr((targets - mu) / sigma), axis=4) # k_losses_per_timestep.shape : (batch_size, seq_len, K) self.k_losses_per_timestep = T.log(self.m) - logsumexp( m_log_likelihoods, axis=3, keepdims=False) # loss_per_timestep.shape : (batch_size, seq_len) self.loss_per_time_step = T.mean(self.k_losses_per_timestep, axis=2) # Average over sequence steps. # k_nlls_per_seq.shape :(batch_size, K) if mask is not None: self.k_losses_per_seq = T.sum( self.k_losses_per_timestep * mask[:, :, None], axis=1) / T.sum( mask, axis=1, keepdims=True) else: self.k_losses_per_seq = T.mean(self.k_losses_per_timestep, axis=1) # Average over K # loss_per_seq.shape :(batch_size,) self.loss_per_seq = T.mean(self.k_losses_per_seq, axis=1) return self.loss_per_seq