def _prepare_layers(self) -> None: self._tf_layers[f"loss.{LABEL}"] = layers.DotProductLoss( self.config[NUM_NEG], self.config[LOSS_TYPE], self.config[MAX_POS_SIM], self.config[MAX_NEG_SIM], self.config[USE_MAX_NEG_SIM], self.config[NEGATIVE_MARGIN_SCALE], self.config[SCALE_LOSS], # set to 1 to get deterministic behaviour parallel_iterations=1 if self.random_seed is not None else 1000, ) self._tf_layers[f"ffnn.{DIALOGUE}"] = layers.Ffnn( self.config[HIDDEN_LAYERS_SIZES][DIALOGUE], self.config[DROP_RATE_DIALOGUE], self.config[REGULARIZATION_CONSTANT], self.config[WEIGHT_SPARSITY], layer_name_suffix=DIALOGUE, ) self._tf_layers[f"ffnn.{LABEL}"] = layers.Ffnn( self.config[HIDDEN_LAYERS_SIZES][LABEL], self.config[DROP_RATE_LABEL], self.config[REGULARIZATION_CONSTANT], self.config[WEIGHT_SPARSITY], layer_name_suffix=LABEL, ) self._tf_layers["transformer"] = TransformerEncoder( self.config[NUM_TRANSFORMER_LAYERS], self.config[TRANSFORMER_SIZE], self.config[NUM_HEADS], self.config[TRANSFORMER_SIZE] * 4, self.config[REGULARIZATION_CONSTANT], dropout_rate=self.config[DROP_RATE_DIALOGUE], attention_dropout_rate=self.config[DROP_RATE_ATTENTION], sparsity=self.config[WEIGHT_SPARSITY], unidirectional=True, use_key_relative_position=self.config[KEY_RELATIVE_ATTENTION], use_value_relative_position=self.config[VALUE_RELATIVE_ATTENTION], max_relative_position=self.config[MAX_RELATIVE_POSITION], name=DIALOGUE + "_encoder", ) self._tf_layers[f"embed.{DIALOGUE}"] = layers.Embed( self.config[EMBEDDING_DIMENSION], self.config[REGULARIZATION_CONSTANT], DIALOGUE, self.config[SIMILARITY_TYPE], ) self._tf_layers[f"embed.{LABEL}"] = layers.Embed( self.config[EMBEDDING_DIMENSION], self.config[REGULARIZATION_CONSTANT], LABEL, self.config[SIMILARITY_TYPE], )
def _prepare_sequence_sentence_concat(self, attribute: Text, config: Dict[Text, Any]) -> None: """Sets up combining sentence- and sequence-level features (if needed). This boils down to preparing for unifying the units of the sequence- and sentence-level features if they differ -- the same number of units is required for combining the features. """ if (self._feature_types_present[SEQUENCE] and self._feature_types_present[SENTENCE]): # The output units of this layer will be based on the output sizes of the # sparse+dense combining layers that are internally applied to all features. sequence_units = self._tf_layers[ f"sparse_dense.{SEQUENCE}"].output_units sentence_units = self._tf_layers[ f"sparse_dense.{SENTENCE}"].output_units # Last dimension needs to be unified if sequence- and sentence-level # features have different sizes, e.g. due to being produced by different # featurizers. if sequence_units != sentence_units: for feature_type in [SEQUENCE, SENTENCE]: self._tf_layers[ f"unify_dims_before_seq_sent_concat.{feature_type}"] = layers.Ffnn( layer_name_suffix= f"unify_dims.{attribute}_{feature_type}", layer_sizes=[config[CONCAT_DIMENSION][attribute]], dropout_rate=config[DROP_RATE], reg_lambda=config[REGULARIZATION_CONSTANT], density=config[CONNECTION_DENSITY], )
def _prepare_ffnn_layer( self, name: Text, layer_sizes: List[int], drop_rate: float, prefix: Text = "ffnn", ) -> None: self._tf_layers[f"{prefix}.{name}"] = layers.Ffnn( layer_sizes, drop_rate, self.config[REGULARIZATION_CONSTANT], self.config[WEIGHT_SPARSITY], layer_name_suffix=name, )
def __init__( self, attribute: Text, attribute_signature: Dict[Text, List[FeatureSignature]], config: Dict[Text, Any], ) -> None: """Creates a new `RasaSequenceLayer` object.""" if not attribute_signature or not attribute_signature.get(SEQUENCE, []): raise TFLayerConfigException( "The attribute signature must contain some sequence-level feature" "signatures but none were found." ) super().__init__(name=f"rasa_sequence_layer_{attribute}") self._tf_layers: Dict[Text, Any] = { self.FEATURE_COMBINING: RasaFeatureCombiningLayer( attribute, attribute_signature, config ), self.FFNN: layers.Ffnn( config[HIDDEN_LAYERS_SIZES][attribute], config[DROP_RATE], config[REGULARIZATION_CONSTANT], config[WEIGHT_SPARSITY], layer_name_suffix=attribute, ), } self._enables_mlm = False # Note: Within TED, masked language modeling becomes just input dropout, # since there is no loss term associated with predicting the masked tokens. self._prepare_masked_language_modeling(attribute, attribute_signature, config) transformer_layers, transformer_units = self._prepare_transformer( attribute, config ) self._has_transformer = transformer_layers > 0 self.output_units = self._calculate_output_units( attribute, transformer_layers, transformer_units, config )