async def test_train_persist_load_with_composite_entities(
    crf_entity_extractor: Callable[[Dict[Text, Any]],
                                   CRFEntityExtractorGraphComponent],
    default_model_storage: ModelStorage,
    default_execution_context: ExecutionContext,
    whitespace_tokenizer: WhitespaceTokenizerGraphComponent,
):
    importer = RasaFileImporter(
        training_data_paths=["data/test/demo-rasa-composite-entities.yml"])
    training_data = importer.get_nlu_data()

    whitespace_tokenizer.process_training_data(training_data)

    crf_extractor = crf_entity_extractor({})
    crf_extractor.train(training_data)

    message = Message(data={TEXT: "I am looking for an italian restaurant"})

    whitespace_tokenizer.process([message])
    message2 = copy.deepcopy(message)

    processed_message = crf_extractor.process([message])[0]

    loaded_extractor = CRFEntityExtractorGraphComponent.load(
        CRFEntityExtractorGraphComponent.get_default_config(),
        default_model_storage,
        Resource("CRFEntityExtractor"),
        default_execution_context,
    )

    processed_message2 = loaded_extractor.process([message2])[0]

    assert processed_message2.fingerprint() == processed_message.fingerprint()
Beispiel #2
0
    def inner(
        pipeline: List[Dict[Text, Any]], training_data: Union[Text, TrainingData],
    ) -> Tuple[TrainingData, List[GraphComponent]]:

        if isinstance(training_data, str):
            importer = RasaFileImporter(training_data_paths=[training_data])
            training_data: TrainingData = importer.get_nlu_data()

        def create_component(
            component_class: Type[GraphComponent], config: Dict[Text, Any], idx: int
        ) -> GraphComponent:
            node_name = f"{component_class.__name__}_{idx}"
            execution_context = ExecutionContext(GraphSchema({}), node_name=node_name)
            resource = Resource(node_name)
            return component_class.create(
                {**component_class.get_default_config(), **config},
                default_model_storage,
                resource,
                execution_context,
            )

        component_pipeline = [
            create_component(component.pop("component"), component, idx)
            for idx, component in enumerate(copy.deepcopy(pipeline))
        ]

        for component in component_pipeline:
            if hasattr(component, "train"):
                component.train(training_data)
            if hasattr(component, "process_training_data"):
                component.process_training_data(training_data)

        return training_data, component_pipeline
Beispiel #3
0
async def test_process_gives_diagnostic_data(
    default_execution_context: ExecutionContext,
    create_response_selector: Callable[[Dict[Text, Any]], ResponseSelector],
    train_and_preprocess: Callable[..., Tuple[TrainingData,
                                              List[GraphComponent]]],
    process_message: Callable[..., Message],
):
    """Tests if processing a message returns attention weights as numpy array."""
    pipeline = [
        {
            "component": WhitespaceTokenizer
        },
        {
            "component": CountVectorsFeaturizer
        },
    ]
    config_params = {EPOCHS: 1}

    importer = RasaFileImporter(
        config_file="data/test_response_selector_bot/config.yml",
        domain_path="data/test_response_selector_bot/domain.yml",
        training_data_paths=[
            "data/test_response_selector_bot/data/rules.yml",
            "data/test_response_selector_bot/data/stories.yml",
            "data/test_response_selector_bot/data/nlu.yml",
        ],
    )
    training_data = importer.get_nlu_data()

    training_data, loaded_pipeline = train_and_preprocess(
        pipeline, training_data)

    default_execution_context.should_add_diagnostic_data = True

    response_selector = create_response_selector(config_params)
    response_selector.train(training_data=training_data)

    message = Message(data={TEXT: "hello"})
    message = process_message(loaded_pipeline, message)

    classified_message = response_selector.process([message])[0]
    diagnostic_data = classified_message.get(DIAGNOSTIC_DATA)

    assert isinstance(diagnostic_data, dict)
    for _, values in diagnostic_data.items():
        assert "text_transformed" in values
        assert isinstance(values.get("text_transformed"), np.ndarray)
        # The `attention_weights` key should exist, regardless of there
        # being a transformer
        assert "attention_weights" in values
        # By default, ResponseSelector has `number_of_transformer_layers = 0`
        # in which case the attention weights should be None.
        assert values.get("attention_weights") is None
Beispiel #4
0
    def inner(
        diet: DIETClassifier,
        pipeline: Optional[List[Dict[Text, Any]]] = None,
        training_data: str = nlu_data_path,
        message_text: Text = "Rasa is great!",
        expect_intent: bool = True,
    ) -> Message:

        if not pipeline:
            pipeline = [
                {
                    "name": "WhitespaceTokenizer"
                },
                {
                    "name": "CountVectorsFeaturizer"
                },
            ]

        loaded_pipeline = [
            registry.get_component_class(component.pop("name"))(component)
            for component in copy.deepcopy(pipeline)
        ]

        importer = RasaFileImporter(training_data_paths=[training_data])
        training_data = importer.get_nlu_data()

        for component in loaded_pipeline:
            component.train(training_data)

        diet.train(training_data=training_data)

        message = Message(data={TEXT: message_text})
        for component in loaded_pipeline:
            component.process(message)

        message2 = copy.deepcopy(message)

        classified_message = diet.process([message])[0]

        if expect_intent:
            assert classified_message.data["intent"]["name"]

        loaded_diet = create_diet(diet.component_config, load=True)

        classified_message2 = loaded_diet.process([message2])[0]

        assert classified_message2.fingerprint(
        ) == classified_message.fingerprint()
        return classified_message
async def test_train_persist_with_different_configurations(
    crf_entity_extractor: Callable[[Dict[Text, Any]],
                                   CRFEntityExtractorGraphComponent],
    config_params: Dict[Text, Any],
    default_model_storage: ModelStorage,
    default_execution_context: ExecutionContext,
    spacy_tokenizer: SpacyTokenizerGraphComponent,
    spacy_nlp: Language,
):

    crf_extractor = crf_entity_extractor(config_params)

    importer = RasaFileImporter(training_data_paths=["data/examples/rasa"])
    training_data = importer.get_nlu_data()

    spacy_model = SpacyModel(model=spacy_nlp, model_name="en_core_web_md")
    training_data = SpacyPreprocessor({}).process_training_data(
        training_data, spacy_model)
    training_data = spacy_tokenizer.process_training_data(training_data)

    crf_extractor.train(training_data)

    message = Message(data={TEXT: "I am looking for an italian restaurant"})
    messages = SpacyPreprocessor({}).process([message], spacy_model)
    message = spacy_tokenizer.process(messages)[0]

    message2 = copy.deepcopy(message)

    processed_message = crf_extractor.process([message])[0]

    loaded_extractor = CRFEntityExtractorGraphComponent.load(
        {
            **CRFEntityExtractorGraphComponent.get_default_config(),
            **config_params
        },
        default_model_storage,
        Resource("CRFEntityExtractor"),
        default_execution_context,
    )

    processed_message2 = loaded_extractor.process([message2])[0]

    assert processed_message2.fingerprint() == processed_message.fingerprint()

    detected_entities = processed_message2.get(ENTITIES)

    assert len(detected_entities) == 1
    assert detected_entities[0]["entity"] == "cuisine"
    assert detected_entities[0]["value"] == "italian"
Beispiel #6
0
def test_rasa_file_importer(project: Text):
    config_path = os.path.join(project, DEFAULT_CONFIG_PATH)
    domain_path = os.path.join(project, DEFAULT_DOMAIN_PATH)
    default_data_path = os.path.join(project, DEFAULT_DATA_PATH)

    importer = RasaFileImporter(config_path, domain_path, [default_data_path])

    domain = importer.get_domain()
    assert len(domain.intents) == 7 + len(DEFAULT_INTENTS)
    assert domain.slots == [AnySlot(SESSION_START_METADATA_SLOT)]
    assert domain.entities == []
    assert len(domain.action_names_or_texts) == 18
    assert len(domain.responses) == 6

    stories = importer.get_stories()
    assert len(stories.story_steps) == 5

    test_stories = importer.get_conversation_tests()
    assert len(test_stories.story_steps) == 0

    nlu_data = importer.get_nlu_data("en")
    assert len(nlu_data.intents) == 7
    assert len(nlu_data.intent_examples) == 69
async def test_train_persist_with_different_configurations(
    crf_entity_extractor: Callable[[Dict[Text, Any]],
                                   CRFEntityExtractorGraphComponent],
    config_params: Dict[Text, Any],
    default_model_storage: ModelStorage,
    default_execution_context: ExecutionContext,
):
    pipeline = [
        {
            "name": "SpacyNLP",
            "model": "en_core_web_md"
        },
        {
            "name": "SpacyTokenizer"
        },
    ]

    loaded_pipeline = [
        registry.get_component_class(component.pop("name")).create(
            component, RasaNLUModelConfig())
        for component in copy.deepcopy(pipeline)
    ]

    crf_extractor = crf_entity_extractor(config_params)

    importer = RasaFileImporter(training_data_paths=["data/examples/rasa"])
    training_data = importer.get_nlu_data()

    for component in loaded_pipeline:
        component.train(training_data)

    crf_extractor.train(training_data)

    message = Message(data={TEXT: "I am looking for an italian restaurant"})

    for component in loaded_pipeline:
        component.process(message)

    message2 = copy.deepcopy(message)

    processed_message = crf_extractor.process([message])[0]

    loaded_extractor = CRFEntityExtractorGraphComponent.load(
        {
            **CRFEntityExtractorGraphComponent.get_default_config(),
            **config_params
        },
        default_model_storage,
        Resource("CRFEntityExtractor"),
        default_execution_context,
    )

    processed_message2 = loaded_extractor.process([message2])[0]

    assert processed_message2.fingerprint() == processed_message.fingerprint()

    detected_entities = processed_message2.get(ENTITIES)

    assert len(detected_entities) == 1
    assert detected_entities[0]["entity"] == "cuisine"
    assert detected_entities[0]["value"] == "italian"