def benchmark_layers_advanced_activations_softmax_overhead(self): layer = advanced_activations.Softmax() x = array_ops.ones((1, 1)) def fn(): layer(x) self._run(fn, 10000)
def _build_attention(self, rank): """Builds multi-head dot-product attention computations. This function builds attributes necessary for `_compute_attention` to costomize attention computation to replace the default dot-product attention. Args: rank: the rank of query, key, value tensors. """ if self._attention_axes is None: self._attention_axes = tuple(range(1, rank - 2)) else: self._attention_axes = tuple(self._attention_axes) self._dot_product_equation, self._combine_equation, attn_scores_rank = ( _build_attention_equation(rank, attn_axes=self._attention_axes)) norm_axes = tuple( range(attn_scores_rank - len(self._attention_axes), attn_scores_rank)) self._softmax = advanced_activations.Softmax(axis=norm_axes) self._dropout_layer = core.Dropout(rate=self._dropout)