Beispiel #1
0
    def inner(config: Dict[Text, Any],
              load: bool = False,
              finetune: bool = False) -> DIETClassifier:
        if load:
            constructor = DIETClassifier.load
        else:
            constructor = DIETClassifier.create

        default_execution_context.is_finetuning = finetune
        return constructor(
            config=rasa.utils.common.override_defaults(
                DIETClassifier.get_default_config(), config),
            model_storage=default_model_storage,
            execution_context=default_execution_context,
            resource=default_diet_resource,
        )
    def inner(config: Dict[Text, Any],
              load: bool = False,
              finetune: bool = False) -> DIETClassifier:
        if load:
            constructor = DIETClassifier.load
        else:
            constructor = DIETClassifier.create

        default_execution_context.is_finetuning = finetune
        return constructor(
            config={
                **DIETClassifier.get_default_config(),
                **config
            },
            model_storage=default_model_storage,
            execution_context=default_execution_context,
            resource=default_diet_resource,
        )
Beispiel #3
0
 def get_default_config() -> Dict[Text, Any]:
     """The component's default config (see parent class for full docstring)."""
     return {
         **DIETClassifier.get_default_config(),
         # ## Architecture of the used neural network
         # Hidden layer sizes for layers before the embedding layers for user message
         # and labels.
         # The number of hidden layers is equal to the length of the corresponding
         # list.
         HIDDEN_LAYERS_SIZES: {
             TEXT: [256, 128],
             LABEL: [256, 128]
         },
         # Whether to share the hidden layer weights between input words
         # and responses
         SHARE_HIDDEN_LAYERS: False,
         # Number of units in transformer
         TRANSFORMER_SIZE: None,
         # Number of transformer layers
         NUM_TRANSFORMER_LAYERS: 0,
         # Number of attention heads in transformer
         NUM_HEADS: 4,
         # If 'True' use key relative embeddings in attention
         KEY_RELATIVE_ATTENTION: False,
         # If 'True' use key relative embeddings in attention
         VALUE_RELATIVE_ATTENTION: False,
         # Max position for relative embeddings. Only in effect if key-
         # or value relative attention are turned on
         MAX_RELATIVE_POSITION: 5,
         # Use a unidirectional or bidirectional encoder.
         UNIDIRECTIONAL_ENCODER: False,
         # ## Training parameters
         # Initial and final batch sizes:
         # Batch size will be linearly increased for each epoch.
         BATCH_SIZES: [64, 256],
         # Strategy used when creating batches.
         # Can be either 'sequence' or 'balanced'.
         BATCH_STRATEGY: BALANCED,
         # Number of epochs to train
         EPOCHS: 300,
         # Set random seed to any 'int' to get reproducible results
         RANDOM_SEED: None,
         # Initial learning rate for the optimizer
         LEARNING_RATE: 0.001,
         # ## Parameters for embeddings
         # Dimension size of embedding vectors
         EMBEDDING_DIMENSION: 20,
         # Default dense dimension to use if no dense features are present.
         DENSE_DIMENSION: {
             TEXT: 512,
             LABEL: 512
         },
         # Default dimension to use for concatenating sequence and sentence features.
         CONCAT_DIMENSION: {
             TEXT: 512,
             LABEL: 512
         },
         # The number of incorrect labels. The algorithm will minimize
         # their similarity to the user input during training.
         NUM_NEG: 20,
         # Type of similarity measure to use, either 'auto' or 'cosine' or 'inner'.
         SIMILARITY_TYPE: AUTO,
         # The type of the loss function, either 'cross_entropy' or 'margin'.
         LOSS_TYPE: CROSS_ENTROPY,
         # Number of top actions for which confidences should be predicted.
         # Set to 0 if confidences for all intents should be reported.
         RANKING_LENGTH: 10,
         # Determines whether the confidences of the chosen top actions should be
         # renormalized so that they sum up to 1. By default, we do not renormalize
         # and return the confidences for the top actions as is.
         # Note that renormalization only makes sense if confidences are generated
         # via `softmax`.
         RENORMALIZE_CONFIDENCES: False,
         # Indicates how similar the algorithm should try to make embedding vectors
         # for correct labels.
         # Should be 0.0 < ... < 1.0 for 'cosine' similarity type.
         MAX_POS_SIM: 0.8,
         # Maximum negative similarity for incorrect labels.
         # Should be -1.0 < ... < 1.0 for 'cosine' similarity type.
         MAX_NEG_SIM: -0.4,
         # If 'True' the algorithm only minimizes maximum similarity over
         # incorrect intent labels, used only if 'loss_type' is set to 'margin'.
         USE_MAX_NEG_SIM: True,
         # Scale loss inverse proportionally to confidence of correct prediction
         SCALE_LOSS: True,
         # ## Regularization parameters
         # The scale of regularization
         REGULARIZATION_CONSTANT: 0.002,
         # Fraction of trainable weights in internal layers.
         CONNECTION_DENSITY: 1.0,
         # The scale of how important is to minimize the maximum similarity
         # between embeddings of different labels.
         NEGATIVE_MARGIN_SCALE: 0.8,
         # Dropout rate for encoder
         DROP_RATE: 0.2,
         # Dropout rate for attention
         DROP_RATE_ATTENTION: 0,
         # If 'True' apply dropout to sparse input tensors
         SPARSE_INPUT_DROPOUT: False,
         # If 'True' apply dropout to dense input tensors
         DENSE_INPUT_DROPOUT: False,
         # ## Evaluation parameters
         # How often calculate validation accuracy.
         # Small values may hurt performance, e.g. model accuracy.
         EVAL_NUM_EPOCHS: 20,
         # How many examples to use for hold out validation set
         # Large values may hurt performance, e.g. model accuracy.
         EVAL_NUM_EXAMPLES: 0,
         # ## Selector config
         # If 'True' random tokens of the input message will be masked and the model
         # should predict those tokens.
         MASKED_LM: False,
         # Name of the intent for which this response selector is to be trained
         RETRIEVAL_INTENT: None,
         # Boolean flag to check if actual text of the response
         # should be used as ground truth label for training the model.
         USE_TEXT_AS_LABEL: False,
         # If you want to use tensorboard to visualize training
         # and validation metrics,
         # set this option to a valid output directory.
         TENSORBOARD_LOG_DIR: None,
         # Define when training metrics for tensorboard should be logged.
         # Either after every epoch or for every training step.
         # Valid values: 'epoch' and 'batch'
         TENSORBOARD_LOG_LEVEL: "epoch",
         # Specify what features to use as sequence and sentence features
         # By default all features in the pipeline are used.
         FEATURIZERS: [],
         # Perform model checkpointing
         CHECKPOINT_MODEL: False,
         # if 'True' applies sigmoid on all similarity terms and adds it
         # to the loss function to ensure that similarity values are
         # approximately bounded. Used inside cross-entropy loss only.
         CONSTRAIN_SIMILARITIES: False,
         # Model confidence to be returned during inference. Currently, the only
         # possible value is `softmax`.
         MODEL_CONFIDENCE: SOFTMAX,
     }