Beispiel #1
0
def prepare_transformer_layer(
    attribute_name: Text,
    config: Dict[Text, Any],
    num_layers: int,
    units: int,
    drop_rate: float,
    unidirectional: bool,
) -> Union[TransformerEncoder, Callable[
    [tf.Tensor, Optional[tf.Tensor], Optional[Union[tf.Tensor, bool]]], Tuple[
        tf.Tensor, Optional[tf.Tensor]], ], ]:
    """Creates & returns a transformer encoder, potentially with 0 layers."""
    if num_layers > 0:
        return TransformerEncoder(
            num_layers,
            units,
            config[NUM_HEADS],
            units * 4,
            config[REGULARIZATION_CONSTANT],
            dropout_rate=drop_rate,
            attention_dropout_rate=config[DROP_RATE_ATTENTION],
            density=config[CONNECTION_DENSITY],
            unidirectional=unidirectional,
            use_key_relative_position=config[KEY_RELATIVE_ATTENTION],
            use_value_relative_position=config[VALUE_RELATIVE_ATTENTION],
            max_relative_position=config[MAX_RELATIVE_POSITION],
            name=f"{attribute_name}_encoder",
        )
    # create lambda so that it can be used later without the check
    return lambda x, mask, training: (x, None)
Beispiel #2
0
 def _prepare_transformer_layer(
     self,
     name: Text,
     num_layers: int,
     units: int,
     drop_rate: float,
     drop_rate_attention: float,
     unidirectional: bool,
     prefix: Text = "transformer",
 ):
     if num_layers > 0:
         self._tf_layers[f"{prefix}.{name}"] = TransformerEncoder(
             num_layers,
             units,
             self.config[NUM_HEADS],
             units * 4,
             self.config[REGULARIZATION_CONSTANT],
             dropout_rate=drop_rate,
             attention_dropout_rate=drop_rate_attention,
             sparsity=self.config[WEIGHT_SPARSITY],
             unidirectional=unidirectional,
             use_key_relative_position=self.config[KEY_RELATIVE_ATTENTION],
             use_value_relative_position=self.
             config[VALUE_RELATIVE_ATTENTION],
             max_relative_position=self.config[MAX_RELATIVE_POSITION],
             name=f"{name}_encoder",
         )
     else:
         # create lambda so that it can be used later without the check
         self._tf_layers[f"{prefix}.{name}"] = lambda x, mask, training: (
             x, None)
Beispiel #3
0
 def _prepare_transformer_layer(
     self,
     name: Text,
     drop_rate: float,
     drop_rate_attention: float,
     prefix: Text = "transformer",
 ):
     if self.config[NUM_TRANSFORMER_LAYERS] > 0:
         self._tf_layers[f"{prefix}.{name}"] = TransformerEncoder(
             self.config[NUM_TRANSFORMER_LAYERS],
             self.config[TRANSFORMER_SIZE],
             self.config[NUM_HEADS],
             self.config[TRANSFORMER_SIZE] * 4,
             self.config[REGULARIZATION_CONSTANT],
             dropout_rate=drop_rate,
             attention_dropout_rate=drop_rate_attention,
             sparsity=self.config[WEIGHT_SPARSITY],
             unidirectional=self.config[UNIDIRECTIONAL_ENCODER],
             use_key_relative_position=self.config[KEY_RELATIVE_ATTENTION],
             use_value_relative_position=self.
             config[VALUE_RELATIVE_ATTENTION],
             max_relative_position=self.config[MAX_RELATIVE_POSITION],
             name=f"{name}_encoder",
         )
     else:
         # create lambda so that it can be used later without the check
         self._tf_layers[f"{prefix}.{name}"] = lambda x, mask, training: x
Beispiel #4
0
 def _prepare_layers(self) -> None:
     self._tf_layers[f"loss.{LABEL}"] = layers.DotProductLoss(
         self.config[NUM_NEG],
         self.config[LOSS_TYPE],
         self.config[MAX_POS_SIM],
         self.config[MAX_NEG_SIM],
         self.config[USE_MAX_NEG_SIM],
         self.config[NEGATIVE_MARGIN_SCALE],
         self.config[SCALE_LOSS],
         # set to 1 to get deterministic behaviour
         parallel_iterations=1 if self.random_seed is not None else 1000,
     )
     self._tf_layers[f"ffnn.{DIALOGUE}"] = layers.Ffnn(
         self.config[HIDDEN_LAYERS_SIZES][DIALOGUE],
         self.config[DROP_RATE_DIALOGUE],
         self.config[REGULARIZATION_CONSTANT],
         self.config[WEIGHT_SPARSITY],
         layer_name_suffix=DIALOGUE,
     )
     self._tf_layers[f"ffnn.{LABEL}"] = layers.Ffnn(
         self.config[HIDDEN_LAYERS_SIZES][LABEL],
         self.config[DROP_RATE_LABEL],
         self.config[REGULARIZATION_CONSTANT],
         self.config[WEIGHT_SPARSITY],
         layer_name_suffix=LABEL,
     )
     self._tf_layers["transformer"] = TransformerEncoder(
         self.config[NUM_TRANSFORMER_LAYERS],
         self.config[TRANSFORMER_SIZE],
         self.config[NUM_HEADS],
         self.config[TRANSFORMER_SIZE] * 4,
         self.config[REGULARIZATION_CONSTANT],
         dropout_rate=self.config[DROP_RATE_DIALOGUE],
         attention_dropout_rate=self.config[DROP_RATE_ATTENTION],
         sparsity=self.config[WEIGHT_SPARSITY],
         unidirectional=True,
         use_key_relative_position=self.config[KEY_RELATIVE_ATTENTION],
         use_value_relative_position=self.config[VALUE_RELATIVE_ATTENTION],
         max_relative_position=self.config[MAX_RELATIVE_POSITION],
         name=DIALOGUE + "_encoder",
     )
     self._tf_layers[f"embed.{DIALOGUE}"] = layers.Embed(
         self.config[EMBEDDING_DIMENSION],
         self.config[REGULARIZATION_CONSTANT],
         DIALOGUE,
         self.config[SIMILARITY_TYPE],
     )
     self._tf_layers[f"embed.{LABEL}"] = layers.Embed(
         self.config[EMBEDDING_DIMENSION],
         self.config[REGULARIZATION_CONSTANT],
         LABEL,
         self.config[SIMILARITY_TYPE],
     )