def weighted_dice(y_true, y_pred, weight): smooth = 1. w, m1, m2 = weight * weight, y_true, y_pred intersection = (m1 * m2) score = (2. * math.reduce_sum(w * intersection) + smooth) / ( math.reduce_sum(w * m1) + math.reduce_sum(w * m2) + smooth) return score
def dice(y_true, y_pred): smooth = 1. y_true_f = tf.cast(K.flatten(y_true), tf.float32) y_pred_f = tf.cast(K.flatten(y_pred), tf.float32) intersection = math.reduce_sum(y_true_f * y_pred_f) return (2. * intersection + smooth) / (math.reduce_sum(y_true_f) + math.reduce_sum(y_pred_f) + smooth)
def dual_cvae_cost(x, x2, y, encoder, decoder, encoder_c, wu_c=0.0, constrain=True): ''' Cost function for conditional VAE with two conditions INPUTS: x - inputs/conditions x2 - second inputs/conditions y - outputs to be reconstructed encoder - the neural network to be used for mapping x, x2 and y to mu_z and log_sig_sq_z decoder - the neural network to be used for mapping z, x and x2 to mu_y and log_sig_sq_y encoder_c - the neural network to be used for mapping x and x2 to mu_cz and log_sig_sq_cz (conditional prior distribution in the latent space) OUTPUTS: cost - the cost function wu_c = constant for bottleneck warmup. wu_c=0 means no bottleneck, wu_c=1 means completely imposing the bottleneck through the latent space ''' x = tf.cast(x, tf.float32) x2 = tf.cast(x2, tf.float32) y = tf.cast(y, tf.float32) # compute moments of p(z|x) mu_cz, log_sig_sq_cz = encoder_c.compute_moments(x, x2) # compute moments of q(z|x,y) mu_z, log_sig_sq_z = encoder.compute_moments(y, x, x2) # sample from q(z|x,y) z = reparameterisation_trick(mu_z, log_sig_sq_z) # bottleneck warmup x_wu = ((1.0 - wu_c) * x + wu_c * tf.random.uniform(tf.shape(x))) x2_wu = ((1.0 - wu_c) * x2 + wu_c * tf.random.uniform(tf.shape(x2))) # compute moments of p(y|z,x) mu_y, log_sig_sq_y = decoder.compute_moments(z, x_wu, x2_wu, constrain=constrain) # KL(q(z|x,y)|p(z|x)) KLe = kl_normal(mu_z, log_sig_sq_z, mu_cz, log_sig_sq_cz) KLc = tfm.reduce_sum(KLe, 1) KL = tfm.reduce_mean(tf.cast(KLc, tf.float32)) # -E_q(z|y,x) log(p(y|z,x)) reconstr_loss = -tfm.reduce_sum( gaussian_log_likelihood(y, mu_y, log_sig_sq_y), 1) cost_R = tfm.reduce_mean(reconstr_loss) # -ELBO cost = cost_R + KL return cost
def mape(y_true, y_pred): mask = y_true[:, :, :, 1] y_true = y_true[:, :, :, 0] output = tf_maths.abs(y_true - y_pred) / y_true output = tf_where(tf_maths.is_nan(output), mask, output) output = tf_where(tf_maths.is_inf(output), mask, output) return tf_maths.reduce_sum(output) / tf_maths.reduce_sum(mask)
def recall(y_true, y_pred): def return_0_5(): return 0.5 def return_rec(): return count / total total = reduce_sum(y_true[:, 1]) pred = tf.cast(y_pred[:, 1] >= threshold, dtype=tf.float32) * y_true[:, 0] count = reduce_sum(tf.cast(y_true[:, 1] + pred == 2, dtype=tf.float32)) return tf.cond(total == 0, return_0_5, return_rec)
def _accuracy(y_true, y_pred): def return_0_5(): return 0.5 def return_acc(): return count / total total = reduce_sum(y_true[:, 0]) pred = tf.cast(y_pred[:, 1] >= threshold, dtype=tf.float32) * y_true[:, 0] matches = tf.cast(pred == y_true[:, 1], dtype=tf.float32) * y_true[:, 0] count = reduce_sum(matches) return tf.cond(total == 0, return_0_5, return_acc)
def compute_batch_tc(z, z_mean, z_log_var): """ Estimates the total correlation over a batch. Based on Locatello et al. implementation (https://github.com/google-research/disentanglement_lib). Compute E_j[log(q(z(x_j))) - log(prod_l q(z(x_j)_l))] where i and j are indexing the batch size and l is indexing the number of latent factors. :param z: the sampled values :param z_mean: the mean of the Gaussian :param z_log_var: the log variance of the Gaussian :return: the total correlation estimated over the batch """ log_qz = compute_gaussian_log_pdf(tf.expand_dims(z, 1), tf.expand_dims(z_mean, 0), tf.expand_dims(z_log_var, 0)) prod_log_qz = tfm.reduce_sum(tfm.reduce_logsumexp(log_qz, axis=1, keepdims=False), axis=1, keepdims=False) log_sum_qz = tfm.reduce_logsumexp(tfm.reduce_sum(log_qz, axis=2, keepdims=False), axis=1, keepdims=False) return tfm.reduce_mean(log_sum_qz, prod_log_qz)
def call(self, ensemble_logits, logits): ''' ensemble_logits are the outputs from our ensemble (batch x ensembles x classes) logits are the predicted outputs from our model (batch x classes) ''' if self.temp is None: self.temp = self.init_temp # Convert values to appropiate type logits = tf.cast(logits, dtype=tf.float64) ensemble_logits = tf.cast(ensemble_logits, dtype=tf.float64) # Calculate probabilities by softmax over classes, adjusted for temperature ensemble_probs = softmax(ensemble_logits / self.temp, axis=2) PN_probs = softmax(logits / self.temp, axis=1) # Calculate mean teacher prediction ensemble_probs_mean = reduce_sum(ensemble_probs, axis=1) # Calculate cost (entropy) cost = reduce_mean(-ensemble_probs_mean * log(PN_probs)**(self.temp**2)) return cost
def likelihood_loss(y_true, _): """Adding negative log likelihood loss.""" return -math.reduce_sum([ self._distribution_layers[pos].log_prob( y_true[:, pos:pos + 1] + distributions.EPSILON) for pos in range(len(self._distribution_layers)) ])
def compute_py(self, x): ''' compute probability for each class INPUTS: x - input OUTPUTS: py - histogram of probabilities for each class ''' hidden1_pre = tfm.add(tfl.matmul(x, self.weights['W_x_to_h1']), self.weights['b_x_to_h1']) hidden_post = self.nonlinearity(hidden1_pre) num_layers_middle = np.shape(self.N_h)[0] - 1 for i in range(num_layers_middle): ni = i + 2 hidden_pre = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_h{}'.format(ni - 1, ni)]), self.weights['b_h{}_to_h{}'.format(ni - 1, ni)]) hidden_post = self.nonlinearity(hidden_pre) p_un = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_py'.format(ni)]), self.weights['b_h{}_to_py'.format(ni)]) p_un = tf.nn.sigmoid(p_un) + 1e-6 py = tfm.divide( p_un, tf.tile(tf.expand_dims(tfm.reduce_sum(p_un, axis=1), axis=1), [1, self.n_y])) return py
def _log_prob(self, sample, x_hat): mask = (sample + 1)/2#Remember that x_hat gives prob of all 1's not given sample's log_prob = ( tfm.log(x_hat + self.epsilon)*tf.cast(mask, tf.float32) + #type: ignore tfm.log(1 - x_hat + self.epsilon)*tf.cast(1 - mask, tf.float32))#type: ignore log_prob = tfm.reduce_sum(log_prob, [1,2,3]) return log_prob
def cos_unit(y_reco, y_true): pred, true = y_reco[1:4], y_true[1:4] cosalpha = tf.math.divide_no_nan( reduce_sum(pred * true, axis=1), tf.math.reduce_euclidean_norm(pred, axis=1) * tf.math.reduce_euclidean_norm(true, axis=1)) cosalpha -= tf.math.sign(cosalpha) * eps return cosalpha
def root_sum_squared_error(inputs): #if K.ndim(y_true) > 2: # return K.mean(K.sqrt(K.sum(K.square(y_true - y_pred), # axis=K.arange(1, K.ndim(y_true)) ))) #else: return tf_math.sqrt( tf_math.reduce_sum(tf_math.square(inputs[0] - inputs[1]), axis=(1, 2, 3, 4)))
def compute_gaussian_kl(z_log_var, z_mean): """ Compute the KL divergence between a Gaussian and a Normal distribution. Based on Locatello et al. implementation (https://github.com/google-research/disentanglement_lib) :param z_log_var: the log variance of the Gaussian :param z_mean: the mean of the Gaussian :return: the KL divergence """ kl_loss = tfm.square(z_mean) + tfm.exp(z_log_var) - z_log_var - 1 return 0.5 * tfm.reduce_sum(kl_loss, [1])
def compute_py(self, xl): ''' compute moments of output Gaussian distribution INPUTS: x - input OUTPUTS: mu_y - mean of output Gaussian distribution log_sig_sq_y - log variance of output Gaussian distribution ''' x, _ = NN_utils.reshape_and_extract(xl, self.sz_im) hidden_post = layers.tf_conv_layer(x, self.weights['W_x_to_h1'], self.weights['b_x_to_h1'], self.St[0], self.nonlinearity) # print(tf.shape(hidden_post).numpy()) num_layers_1 = np.shape(self.N_h1)[0] - 1 for i in range(num_layers_1): ni = i + 2 hidden_post = layers.tf_conv_layer( hidden_post, self.weights['W_h{}_to_h{}'.format(ni - 1, ni)], self.weights['b_h{}_to_h{}'.format(ni - 1, ni)], self.St[ni - 1], self.nonlinearity) # print(tf.shape(hidden_post).numpy()) hidden_post = NN_utils.flatten(hidden_post) # print(tf.shape(hidden_post).numpy()) num_layers_F = np.shape(self.NF_h)[0] for i in range(num_layers_F): ni = ni + 1 hidden_pre = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_h{}'.format(ni - 1, ni)]), self.weights['b_h{}_to_h{}'.format(ni - 1, ni)]) hidden_post = self.nonlinearity(hidden_pre) # print(tf.shape(hidden_post).numpy()) p_un = tfm.add( tfl.matmul(hidden_post, self.weights['W_h{}_to_py'.format(ni)]), self.weights['b_h{}_to_py'.format(ni)]) p_un = tf.nn.sigmoid(p_un) + 1e-6 py = tfm.divide( p_un, tf.tile(tf.expand_dims(tfm.reduce_sum(p_un, axis=1), axis=1), [1, self.n_y])) return py
def energy(sample, pbc=False): """Calculates energy assuming open boundary conditions Args: sample (tf.Tensor): A batch of Ising lattices sampled from a VAN network """ if pbc: #Adding nearest neighbours along y term = tf.roll(sample, 1, 1)*sample energy = tfm.reduce_sum(term, axis=[1,2,3]) #Adding nearest neighbours along x term = tf.roll(sample, 1, 2)*sample energy += tfm.reduce_sum(term, axis=[1,2,3]) if lattice=='tri': term = tf.roll(sample, [1,1], [1,2])*sample energy += tfm.reduce_sum(term, axis=[1,2,3]) else: #Adding nearest neighbours along y term = sample[:, :-1, :, :]*sample[:, 1:, :, :] energy = tfm.reduce_sum(term, axis=[1,2,3]) #Adding nearest neighbours along x term = sample[:,:,:-1,:]*sample[:,:,1:,:] energy += tfm.reduce_sum(term, axis=[1,2,3]) if lattice == 'tri': term = sample[:,:-1,:-1,:]*sample[:,1:,1:,:] energy += tfm.reduce_sum(term, axis=[1,2,3]) return tf.cast(J*energy, tf.float32)
def adaptive_wing_loss(labels, output): alpha = 2.1 omega = 14 epsilon = 1 theta = 0.5 with tf.name_scope('adaptive_wing_loss'): x = output - labels theta_over_epsilon_tensor = tf.fill(tf.shape(labels), theta/epsilon) A = omega*(1/(1+pow(theta_over_epsilon_tensor, alpha-labels)))*(alpha-labels)*pow(theta_over_epsilon_tensor, alpha-labels-1)*(1/epsilon) C = theta*A-omega*log(1+pow(theta_over_epsilon_tensor, alpha-labels)) absolute_x = abs(x) losses = tf.where(greater(theta, absolute_x), omega*log(1+pow(absolute_x/epsilon, alpha-labels)), A*absolute_x-C) loss = reduce_mean(reduce_sum(losses, axis=[1, 2]), axis=0) return loss
def create_model(albert_config, is_training, a_input_ids, a_input_mask, a_segment_ids, b_input_ids, b_input_mask, b_segment_ids, labels, num_labels, use_one_hot_embeddings): """Creates a classification model.""" #import pdb #pdb.set_trace() a_model = modeling.AlbertModel( config=albert_config, is_training=is_training, input_ids=a_input_ids, input_mask=a_input_mask, token_type_ids=a_segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) b_model = modeling.AlbertModel( config=albert_config, is_training=is_training, input_ids=b_input_ids, input_mask=b_input_mask, token_type_ids=b_segment_ids, use_one_hot_embeddings=use_one_hot_embeddings) # In the demo, we are doing a simple classification task on the entire # segment. # # If you want to use the token-level output, use model.get_sequence_output() # instead. if FLAGS.use_pooled_output: tf.logging.info("using pooled output") a_output_layer = a_model.get_pooled_output() b_output_layer = b_model.get_pooled_output() else: tf.logging.info("using meaned output") a_output_layer = tf.reduce_mean(a_model.get_sequence_output(), axis=1) b_output_layer = tf.reduce_mean(b_model.get_sequence_output(), axis=1) with tf.variable_scope("loss"): if is_training: # I.e., 0.1 dropout a_output_layer = tf.nn.dropout(a_output_layer, keep_prob=0.9, name='a_dropout') b_output_layer = tf.nn.dropout(b_output_layer, keep_prob=0.9, name='a_dropout') from tensorflow.math import l2_normalize, reduce_sum a_l2_norm = l2_normalize(a_output_layer, axis=-1) b_l2_norm = l2_normalize(b_output_layer, axis=-1) predictions = reduce_sum(a_l2_norm*b_l2_norm, axis = -1)#batch_size 1 from tensorflow.keras.losses import MSE loss = MSE(labels, predictions) return (a_output_layer, loss, predictions)
def call(self, ensemble_logits, logits): ''' ensemble_logits are the outputs from our ensemble (batch x ensembles x classes) logits are the predicted outputs from our model (batch x classes) ''' logits = tf.cast(logits, dtype=tf.float64) ensemble_logits = tf.cast(ensemble_logits, dtype=tf.float64) alphas = exp(logits / self.temp) precision = reduce_sum(alphas, axis=1) #sum over classes ensemble_probs = softmax(ensemble_logits / self.temp, axis=2) #softmax over classes # Smooth for num. stability: probs_mean = 1 / (tf.shape(ensemble_probs)[2] ) #divide by nr of classes # Subtract mean, scale down, add mean back) ensemble_probs = self.tp_scaling * (ensemble_probs - probs_mean) + probs_mean log_ensemble_probs_geo_mean = reduce_mean(log(ensemble_probs + self.smooth_val), axis=1) #mean over ensembles target_independent_term = reduce_sum( lgamma(alphas + self.smooth_val), axis=1) - lgamma( precision + self.smooth_val ) #sum over lgammma of classes - lgamma(precision) target_dependent_term = -reduce_sum( (alphas - 1.) * log_ensemble_probs_geo_mean, axis=1) # -sum over classes cost = target_dependent_term + target_independent_term # tf.print(self.temp) return reduce_mean(cost) * (self.temp**2) #mean of all batches
def proceed(): num_tfs = current_state.shape[0] new_state = current_state Δrange = np.arange(self.lower, self.upper + 1, dtype='float64') Δrange_tf = tf.range(self.lower, self.upper + 1, dtype='float64') for i in range(num_tfs): # Generate normalised cumulative distribution probs = list() mask = np.zeros((num_tfs, ), dtype='float64') mask[i] = 1 for Δ in Δrange: test_state = (1 - mask) * new_state + mask * Δ # if j == 0: # cumsum.append(tf.reduce_sum(self.likelihood.genes( # all_states=all_states, # state_indices=self.state_indices, # Δ=test_state, # )) + tf.reduce_sum(self.prior.log_prob(Δ))) # else: probs.append( tf.reduce_sum( self.likelihood.genes( all_states=all_states, state_indices=self.state_indices, Δ=test_state, )) + tf.reduce_sum(self.prior.log_prob(Δ))) # curri = tf.cast(current_state[i], 'int64') # start_index = tf.reduce_max([self.lower, curri-2]) # probs = tf.gather(probs, tf.range(start_index, # tf.reduce_min([self.upper+1, curri+3]))) probs = tf.stack(probs) - tfm.reduce_max(probs) probs = tfm.exp(probs) probs = probs / tfm.reduce_sum(probs) cumsum = tfm.cumsum(probs) u = tf.random.uniform([], dtype='float64') index = tf.where( cumsum == tf.reduce_min(cumsum[(cumsum - u) > 0])) chosen = Δrange_tf[index[0][0]] new_state = (1 - mask) * new_state + mask * chosen return new_state
def __call__(self, x, probes): """Propagate forward in time for the length of the input. Parameters ---------- x : Input sequence(s), batched in first dimension probe_output : bool Defines whether the output is the probe vector or the entire spatial distribution of the scalar wave field in time """ # hacky way of figuring out if we're on the GPU from inside the model device = "cuda" if next(self.parameters()).is_cuda else "cpu" # First dim is batch batch_size = x.shape[0] # init hidden states y1 = tf.zeros([batch_size, self.Nx, self.Ny], dtype=tf.dtypes.float32) y2 = tf.zeros([batch_size, self.Nx, self.Ny], dtype=tf.dtypes.float32) y_all = [] for xi in x: y, y1 = self.time_step(xi, y1, y2) y_all.append(y) y = tf.stack(y_all, axis=1) total_sum = 0 y_outs = [] for probe_crd in probes: px, py = probe_crd y_out = math.reduce_sum(math.square(y[:,:,px,py])) total_sum += y_out y_outs.append(y_out) y_outs = tf.constant(y_outs) / total_sum return y_outs
def mse(pred, label): # Mean Squared Error loss = math.squared_difference(label, pred) return math.reduce_sum(loss) / len(pred)
def rmse(y_true, y_pred): mask = y_true[:, :, :, 1] y_true = y_true[:, :, :, 0] output = ((y_true - y_pred)**2) * mask return tf_maths.sqrt( tf_maths.reduce_sum(output) / tf_maths.reduce_sum(mask))
def BCE_with_sample_type_indicator(y_true, y_pred): return reduce_sum( y_true[:, 0] * binary_crossentropy(y_true=y_true[:, 1:], y_pred=y_pred[:, 1:]))
def MSE_with_sti_and_hsm(y_true, y_pred): return reduce_sum( tf.sort(y_true[:, 0] * MSE(y_true=y_true[:, 1:], y_pred=y_pred[:, 1:]), direction='DESCENDING')[:num_back])
def MSE_with_sample_type_indicator(y_true, y_pred): return reduce_sum(y_true[:, 0] * MSE(y_true=y_true[:, 1:], y_pred=y_pred[:, 1:]))
def BCE_with_sti_and_hsm(y_true, y_pred): return reduce_sum( tf.sort(y_true[:, 0] * binary_crossentropy(y_true=y_true[:, 1:], y_pred=y_pred[:, 1:]), direction='DESCENDING')[:num_back])
def mae(y_true, y_pred): mask = y_true[:, :, :, 1] y_true = y_true[:, :, :, 0] output = (tf_maths.abs(y_true - y_pred)) * mask return tf_maths.reduce_sum(output) / tf_maths.reduce_sum(mask)