def prepare_transformer_layer( attribute_name: Text, config: Dict[Text, Any], num_layers: int, units: int, drop_rate: float, unidirectional: bool, ) -> Union[TransformerEncoder, Callable[ [tf.Tensor, Optional[tf.Tensor], Optional[Union[tf.Tensor, bool]]], Tuple[ tf.Tensor, Optional[tf.Tensor]], ], ]: """Creates & returns a transformer encoder, potentially with 0 layers.""" if num_layers > 0: return TransformerEncoder( num_layers, units, config[NUM_HEADS], units * 4, config[REGULARIZATION_CONSTANT], dropout_rate=drop_rate, attention_dropout_rate=config[DROP_RATE_ATTENTION], density=config[CONNECTION_DENSITY], unidirectional=unidirectional, use_key_relative_position=config[KEY_RELATIVE_ATTENTION], use_value_relative_position=config[VALUE_RELATIVE_ATTENTION], max_relative_position=config[MAX_RELATIVE_POSITION], name=f"{attribute_name}_encoder", ) # create lambda so that it can be used later without the check return lambda x, mask, training: (x, None)
def _prepare_transformer_layer( self, name: Text, num_layers: int, units: int, drop_rate: float, drop_rate_attention: float, unidirectional: bool, prefix: Text = "transformer", ): if num_layers > 0: self._tf_layers[f"{prefix}.{name}"] = TransformerEncoder( num_layers, units, self.config[NUM_HEADS], units * 4, self.config[REGULARIZATION_CONSTANT], dropout_rate=drop_rate, attention_dropout_rate=drop_rate_attention, sparsity=self.config[WEIGHT_SPARSITY], unidirectional=unidirectional, use_key_relative_position=self.config[KEY_RELATIVE_ATTENTION], use_value_relative_position=self. config[VALUE_RELATIVE_ATTENTION], max_relative_position=self.config[MAX_RELATIVE_POSITION], name=f"{name}_encoder", ) else: # create lambda so that it can be used later without the check self._tf_layers[f"{prefix}.{name}"] = lambda x, mask, training: ( x, None)
def _prepare_transformer_layer( self, name: Text, drop_rate: float, drop_rate_attention: float, prefix: Text = "transformer", ): if self.config[NUM_TRANSFORMER_LAYERS] > 0: self._tf_layers[f"{prefix}.{name}"] = TransformerEncoder( self.config[NUM_TRANSFORMER_LAYERS], self.config[TRANSFORMER_SIZE], self.config[NUM_HEADS], self.config[TRANSFORMER_SIZE] * 4, self.config[REGULARIZATION_CONSTANT], dropout_rate=drop_rate, attention_dropout_rate=drop_rate_attention, sparsity=self.config[WEIGHT_SPARSITY], unidirectional=self.config[UNIDIRECTIONAL_ENCODER], use_key_relative_position=self.config[KEY_RELATIVE_ATTENTION], use_value_relative_position=self. config[VALUE_RELATIVE_ATTENTION], max_relative_position=self.config[MAX_RELATIVE_POSITION], name=f"{name}_encoder", ) else: # create lambda so that it can be used later without the check self._tf_layers[f"{prefix}.{name}"] = lambda x, mask, training: x
def _prepare_layers(self) -> None: self._tf_layers[f"loss.{LABEL}"] = layers.DotProductLoss( self.config[NUM_NEG], self.config[LOSS_TYPE], self.config[MAX_POS_SIM], self.config[MAX_NEG_SIM], self.config[USE_MAX_NEG_SIM], self.config[NEGATIVE_MARGIN_SCALE], self.config[SCALE_LOSS], # set to 1 to get deterministic behaviour parallel_iterations=1 if self.random_seed is not None else 1000, ) self._tf_layers[f"ffnn.{DIALOGUE}"] = layers.Ffnn( self.config[HIDDEN_LAYERS_SIZES][DIALOGUE], self.config[DROP_RATE_DIALOGUE], self.config[REGULARIZATION_CONSTANT], self.config[WEIGHT_SPARSITY], layer_name_suffix=DIALOGUE, ) self._tf_layers[f"ffnn.{LABEL}"] = layers.Ffnn( self.config[HIDDEN_LAYERS_SIZES][LABEL], self.config[DROP_RATE_LABEL], self.config[REGULARIZATION_CONSTANT], self.config[WEIGHT_SPARSITY], layer_name_suffix=LABEL, ) self._tf_layers["transformer"] = TransformerEncoder( self.config[NUM_TRANSFORMER_LAYERS], self.config[TRANSFORMER_SIZE], self.config[NUM_HEADS], self.config[TRANSFORMER_SIZE] * 4, self.config[REGULARIZATION_CONSTANT], dropout_rate=self.config[DROP_RATE_DIALOGUE], attention_dropout_rate=self.config[DROP_RATE_ATTENTION], sparsity=self.config[WEIGHT_SPARSITY], unidirectional=True, use_key_relative_position=self.config[KEY_RELATIVE_ATTENTION], use_value_relative_position=self.config[VALUE_RELATIVE_ATTENTION], max_relative_position=self.config[MAX_RELATIVE_POSITION], name=DIALOGUE + "_encoder", ) self._tf_layers[f"embed.{DIALOGUE}"] = layers.Embed( self.config[EMBEDDING_DIMENSION], self.config[REGULARIZATION_CONSTANT], DIALOGUE, self.config[SIMILARITY_TYPE], ) self._tf_layers[f"embed.{LABEL}"] = layers.Embed( self.config[EMBEDDING_DIMENSION], self.config[REGULARIZATION_CONSTANT], LABEL, self.config[SIMILARITY_TYPE], )