def test_greater_v1(test_case): input1 = flow.Tensor(np.array([1, 1, 4]).astype(np.float32), dtype=flow.float32) input2 = flow.Tensor(np.array([1, 2, 3]).astype(np.float32), dtype=flow.float32) of_out = flow.gt(input1, input2) np_out = np.greater(input1.numpy(), input2.numpy()) test_case.assertTrue(np.array_equal(of_out.numpy(), np_out))
def _test_greater_normal(test_case, device): input1 = flow.tensor( np.array([1, 1, 4]).astype(np.float32), dtype=flow.float32, device=flow.device(device), ) input2 = flow.tensor( np.array([1, 2, 3]).astype(np.float32), dtype=flow.float32, device=flow.device(device), ) of_out = flow.gt(input1, input2) np_out = np.greater(input1.numpy(), input2.numpy()) test_case.assertTrue(np.array_equal(of_out.numpy(), np_out))
def _prob_in_top_k( self, clean_values, noisy_values, noise_stddev, noisy_top_values ): """Helper function to NoisyTopKGating. Computes the probability that value is in top k, given different random noise. This gives us a way of backpropagating from a loss that balances the number of times each expert is in the top k experts per example. In the case of no noise, pass in None for noise_stddev, and the result will not be differentiable. Args: clean_values: a `Tensor` of shape [batch, n]. noisy_values: a `Tensor` of shape [batch, n]. Equal to clean values plus normally distributed noise with standard deviation noise_stddev. noise_stddev: a `Tensor` of shape [batch, n], or None noisy_top_values: a `Tensor` of shape [batch, m]. "values" Output of tf.top_k(noisy_top_values, m). m >= k+1 Returns: a `Tensor` of shape [batch, n]. """ batch = clean_values.size(0) m = noisy_top_values.size(1) top_values_flat = noisy_top_values.flatten() threshold_positions_if_in = ( flow.arange(batch, device=noisy_values.device) * m + self.k ) threshold_if_in = flow.unsqueeze( flow.gather(top_values_flat, 0, threshold_positions_if_in), 1 ) is_in = flow.gt(noisy_values, threshold_if_in) threshold_positions_if_out = threshold_positions_if_in - 1 threshold_if_out = flow.unsqueeze( flow.gather(top_values_flat, 0, threshold_positions_if_out), 1 ) # is each value currently in the top k. prob_if_in = cdf((clean_values - threshold_if_in) / noise_stddev) prob_if_out = cdf((clean_values - threshold_if_out) / noise_stddev) prob = flow.where(is_in, prob_if_in, prob_if_out) return prob
def _gt(self, other): return flow.gt(self, other)