def benchmark_layers_advanced_activations_softmax_overhead(self):

        layer = advanced_activations.Softmax()
        x = array_ops.ones((1, 1))

        def fn():
            layer(x)

        self._run(fn, 10000)
 def _build_attention(self, rank):
     """Builds multi-head dot-product attention computations.
 This function builds attributes necessary for `_compute_attention` to
 costomize attention computation to replace the default dot-product
 attention.
 Args:
   rank: the rank of query, key, value tensors.
 """
     if self._attention_axes is None:
         self._attention_axes = tuple(range(1, rank - 2))
     else:
         self._attention_axes = tuple(self._attention_axes)
     self._dot_product_equation, self._combine_equation, attn_scores_rank = (
         _build_attention_equation(rank, attn_axes=self._attention_axes))
     norm_axes = tuple(
         range(attn_scores_rank - len(self._attention_axes),
               attn_scores_rank))
     self._softmax = advanced_activations.Softmax(axis=norm_axes)
     self._dropout_layer = core.Dropout(rate=self._dropout)