def inner(config: Dict[Text, Any], load: bool = False, finetune: bool = False) -> DIETClassifier: if load: constructor = DIETClassifier.load else: constructor = DIETClassifier.create default_execution_context.is_finetuning = finetune return constructor( config=rasa.utils.common.override_defaults( DIETClassifier.get_default_config(), config), model_storage=default_model_storage, execution_context=default_execution_context, resource=default_diet_resource, )
def inner(config: Dict[Text, Any], load: bool = False, finetune: bool = False) -> DIETClassifier: if load: constructor = DIETClassifier.load else: constructor = DIETClassifier.create default_execution_context.is_finetuning = finetune return constructor( config={ **DIETClassifier.get_default_config(), **config }, model_storage=default_model_storage, execution_context=default_execution_context, resource=default_diet_resource, )
def get_default_config() -> Dict[Text, Any]: """The component's default config (see parent class for full docstring).""" return { **DIETClassifier.get_default_config(), # ## Architecture of the used neural network # Hidden layer sizes for layers before the embedding layers for user message # and labels. # The number of hidden layers is equal to the length of the corresponding # list. HIDDEN_LAYERS_SIZES: { TEXT: [256, 128], LABEL: [256, 128] }, # Whether to share the hidden layer weights between input words # and responses SHARE_HIDDEN_LAYERS: False, # Number of units in transformer TRANSFORMER_SIZE: None, # Number of transformer layers NUM_TRANSFORMER_LAYERS: 0, # Number of attention heads in transformer NUM_HEADS: 4, # If 'True' use key relative embeddings in attention KEY_RELATIVE_ATTENTION: False, # If 'True' use key relative embeddings in attention VALUE_RELATIVE_ATTENTION: False, # Max position for relative embeddings. Only in effect if key- # or value relative attention are turned on MAX_RELATIVE_POSITION: 5, # Use a unidirectional or bidirectional encoder. UNIDIRECTIONAL_ENCODER: False, # ## Training parameters # Initial and final batch sizes: # Batch size will be linearly increased for each epoch. BATCH_SIZES: [64, 256], # Strategy used when creating batches. # Can be either 'sequence' or 'balanced'. BATCH_STRATEGY: BALANCED, # Number of epochs to train EPOCHS: 300, # Set random seed to any 'int' to get reproducible results RANDOM_SEED: None, # Initial learning rate for the optimizer LEARNING_RATE: 0.001, # ## Parameters for embeddings # Dimension size of embedding vectors EMBEDDING_DIMENSION: 20, # Default dense dimension to use if no dense features are present. DENSE_DIMENSION: { TEXT: 512, LABEL: 512 }, # Default dimension to use for concatenating sequence and sentence features. CONCAT_DIMENSION: { TEXT: 512, LABEL: 512 }, # The number of incorrect labels. The algorithm will minimize # their similarity to the user input during training. NUM_NEG: 20, # Type of similarity measure to use, either 'auto' or 'cosine' or 'inner'. SIMILARITY_TYPE: AUTO, # The type of the loss function, either 'cross_entropy' or 'margin'. LOSS_TYPE: CROSS_ENTROPY, # Number of top actions for which confidences should be predicted. # Set to 0 if confidences for all intents should be reported. RANKING_LENGTH: 10, # Determines whether the confidences of the chosen top actions should be # renormalized so that they sum up to 1. By default, we do not renormalize # and return the confidences for the top actions as is. # Note that renormalization only makes sense if confidences are generated # via `softmax`. RENORMALIZE_CONFIDENCES: False, # Indicates how similar the algorithm should try to make embedding vectors # for correct labels. # Should be 0.0 < ... < 1.0 for 'cosine' similarity type. MAX_POS_SIM: 0.8, # Maximum negative similarity for incorrect labels. # Should be -1.0 < ... < 1.0 for 'cosine' similarity type. MAX_NEG_SIM: -0.4, # If 'True' the algorithm only minimizes maximum similarity over # incorrect intent labels, used only if 'loss_type' is set to 'margin'. USE_MAX_NEG_SIM: True, # Scale loss inverse proportionally to confidence of correct prediction SCALE_LOSS: True, # ## Regularization parameters # The scale of regularization REGULARIZATION_CONSTANT: 0.002, # Fraction of trainable weights in internal layers. CONNECTION_DENSITY: 1.0, # The scale of how important is to minimize the maximum similarity # between embeddings of different labels. NEGATIVE_MARGIN_SCALE: 0.8, # Dropout rate for encoder DROP_RATE: 0.2, # Dropout rate for attention DROP_RATE_ATTENTION: 0, # If 'True' apply dropout to sparse input tensors SPARSE_INPUT_DROPOUT: False, # If 'True' apply dropout to dense input tensors DENSE_INPUT_DROPOUT: False, # ## Evaluation parameters # How often calculate validation accuracy. # Small values may hurt performance, e.g. model accuracy. EVAL_NUM_EPOCHS: 20, # How many examples to use for hold out validation set # Large values may hurt performance, e.g. model accuracy. EVAL_NUM_EXAMPLES: 0, # ## Selector config # If 'True' random tokens of the input message will be masked and the model # should predict those tokens. MASKED_LM: False, # Name of the intent for which this response selector is to be trained RETRIEVAL_INTENT: None, # Boolean flag to check if actual text of the response # should be used as ground truth label for training the model. USE_TEXT_AS_LABEL: False, # If you want to use tensorboard to visualize training # and validation metrics, # set this option to a valid output directory. TENSORBOARD_LOG_DIR: None, # Define when training metrics for tensorboard should be logged. # Either after every epoch or for every training step. # Valid values: 'epoch' and 'batch' TENSORBOARD_LOG_LEVEL: "epoch", # Specify what features to use as sequence and sentence features # By default all features in the pipeline are used. FEATURIZERS: [], # Perform model checkpointing CHECKPOINT_MODEL: False, # if 'True' applies sigmoid on all similarity terms and adds it # to the loss function to ensure that similarity values are # approximately bounded. Used inside cross-entropy loss only. CONSTRAIN_SIMILARITIES: False, # Model confidence to be returned during inference. Currently, the only # possible value is `softmax`. MODEL_CONFIDENCE: SOFTMAX, }