async def test_train_persist_load_with_composite_entities( crf_entity_extractor: Callable[[Dict[Text, Any]], CRFEntityExtractorGraphComponent], default_model_storage: ModelStorage, default_execution_context: ExecutionContext, whitespace_tokenizer: WhitespaceTokenizerGraphComponent, ): importer = RasaFileImporter( training_data_paths=["data/test/demo-rasa-composite-entities.yml"]) training_data = importer.get_nlu_data() whitespace_tokenizer.process_training_data(training_data) crf_extractor = crf_entity_extractor({}) crf_extractor.train(training_data) message = Message(data={TEXT: "I am looking for an italian restaurant"}) whitespace_tokenizer.process([message]) message2 = copy.deepcopy(message) processed_message = crf_extractor.process([message])[0] loaded_extractor = CRFEntityExtractorGraphComponent.load( CRFEntityExtractorGraphComponent.get_default_config(), default_model_storage, Resource("CRFEntityExtractor"), default_execution_context, ) processed_message2 = loaded_extractor.process([message2])[0] assert processed_message2.fingerprint() == processed_message.fingerprint()
def inner( pipeline: List[Dict[Text, Any]], training_data: Union[Text, TrainingData], ) -> Tuple[TrainingData, List[GraphComponent]]: if isinstance(training_data, str): importer = RasaFileImporter(training_data_paths=[training_data]) training_data: TrainingData = importer.get_nlu_data() def create_component( component_class: Type[GraphComponent], config: Dict[Text, Any], idx: int ) -> GraphComponent: node_name = f"{component_class.__name__}_{idx}" execution_context = ExecutionContext(GraphSchema({}), node_name=node_name) resource = Resource(node_name) return component_class.create( {**component_class.get_default_config(), **config}, default_model_storage, resource, execution_context, ) component_pipeline = [ create_component(component.pop("component"), component, idx) for idx, component in enumerate(copy.deepcopy(pipeline)) ] for component in component_pipeline: if hasattr(component, "train"): component.train(training_data) if hasattr(component, "process_training_data"): component.process_training_data(training_data) return training_data, component_pipeline
async def test_process_gives_diagnostic_data( default_execution_context: ExecutionContext, create_response_selector: Callable[[Dict[Text, Any]], ResponseSelector], train_and_preprocess: Callable[..., Tuple[TrainingData, List[GraphComponent]]], process_message: Callable[..., Message], ): """Tests if processing a message returns attention weights as numpy array.""" pipeline = [ { "component": WhitespaceTokenizer }, { "component": CountVectorsFeaturizer }, ] config_params = {EPOCHS: 1} importer = RasaFileImporter( config_file="data/test_response_selector_bot/config.yml", domain_path="data/test_response_selector_bot/domain.yml", training_data_paths=[ "data/test_response_selector_bot/data/rules.yml", "data/test_response_selector_bot/data/stories.yml", "data/test_response_selector_bot/data/nlu.yml", ], ) training_data = importer.get_nlu_data() training_data, loaded_pipeline = train_and_preprocess( pipeline, training_data) default_execution_context.should_add_diagnostic_data = True response_selector = create_response_selector(config_params) response_selector.train(training_data=training_data) message = Message(data={TEXT: "hello"}) message = process_message(loaded_pipeline, message) classified_message = response_selector.process([message])[0] diagnostic_data = classified_message.get(DIAGNOSTIC_DATA) assert isinstance(diagnostic_data, dict) for _, values in diagnostic_data.items(): assert "text_transformed" in values assert isinstance(values.get("text_transformed"), np.ndarray) # The `attention_weights` key should exist, regardless of there # being a transformer assert "attention_weights" in values # By default, ResponseSelector has `number_of_transformer_layers = 0` # in which case the attention weights should be None. assert values.get("attention_weights") is None
def inner( diet: DIETClassifier, pipeline: Optional[List[Dict[Text, Any]]] = None, training_data: str = nlu_data_path, message_text: Text = "Rasa is great!", expect_intent: bool = True, ) -> Message: if not pipeline: pipeline = [ { "name": "WhitespaceTokenizer" }, { "name": "CountVectorsFeaturizer" }, ] loaded_pipeline = [ registry.get_component_class(component.pop("name"))(component) for component in copy.deepcopy(pipeline) ] importer = RasaFileImporter(training_data_paths=[training_data]) training_data = importer.get_nlu_data() for component in loaded_pipeline: component.train(training_data) diet.train(training_data=training_data) message = Message(data={TEXT: message_text}) for component in loaded_pipeline: component.process(message) message2 = copy.deepcopy(message) classified_message = diet.process([message])[0] if expect_intent: assert classified_message.data["intent"]["name"] loaded_diet = create_diet(diet.component_config, load=True) classified_message2 = loaded_diet.process([message2])[0] assert classified_message2.fingerprint( ) == classified_message.fingerprint() return classified_message
async def test_train_persist_with_different_configurations( crf_entity_extractor: Callable[[Dict[Text, Any]], CRFEntityExtractorGraphComponent], config_params: Dict[Text, Any], default_model_storage: ModelStorage, default_execution_context: ExecutionContext, spacy_tokenizer: SpacyTokenizerGraphComponent, spacy_nlp: Language, ): crf_extractor = crf_entity_extractor(config_params) importer = RasaFileImporter(training_data_paths=["data/examples/rasa"]) training_data = importer.get_nlu_data() spacy_model = SpacyModel(model=spacy_nlp, model_name="en_core_web_md") training_data = SpacyPreprocessor({}).process_training_data( training_data, spacy_model) training_data = spacy_tokenizer.process_training_data(training_data) crf_extractor.train(training_data) message = Message(data={TEXT: "I am looking for an italian restaurant"}) messages = SpacyPreprocessor({}).process([message], spacy_model) message = spacy_tokenizer.process(messages)[0] message2 = copy.deepcopy(message) processed_message = crf_extractor.process([message])[0] loaded_extractor = CRFEntityExtractorGraphComponent.load( { **CRFEntityExtractorGraphComponent.get_default_config(), **config_params }, default_model_storage, Resource("CRFEntityExtractor"), default_execution_context, ) processed_message2 = loaded_extractor.process([message2])[0] assert processed_message2.fingerprint() == processed_message.fingerprint() detected_entities = processed_message2.get(ENTITIES) assert len(detected_entities) == 1 assert detected_entities[0]["entity"] == "cuisine" assert detected_entities[0]["value"] == "italian"
def test_rasa_file_importer(project: Text): config_path = os.path.join(project, DEFAULT_CONFIG_PATH) domain_path = os.path.join(project, DEFAULT_DOMAIN_PATH) default_data_path = os.path.join(project, DEFAULT_DATA_PATH) importer = RasaFileImporter(config_path, domain_path, [default_data_path]) domain = importer.get_domain() assert len(domain.intents) == 7 + len(DEFAULT_INTENTS) assert domain.slots == [AnySlot(SESSION_START_METADATA_SLOT)] assert domain.entities == [] assert len(domain.action_names_or_texts) == 18 assert len(domain.responses) == 6 stories = importer.get_stories() assert len(stories.story_steps) == 5 test_stories = importer.get_conversation_tests() assert len(test_stories.story_steps) == 0 nlu_data = importer.get_nlu_data("en") assert len(nlu_data.intents) == 7 assert len(nlu_data.intent_examples) == 69
async def test_train_persist_with_different_configurations( crf_entity_extractor: Callable[[Dict[Text, Any]], CRFEntityExtractorGraphComponent], config_params: Dict[Text, Any], default_model_storage: ModelStorage, default_execution_context: ExecutionContext, ): pipeline = [ { "name": "SpacyNLP", "model": "en_core_web_md" }, { "name": "SpacyTokenizer" }, ] loaded_pipeline = [ registry.get_component_class(component.pop("name")).create( component, RasaNLUModelConfig()) for component in copy.deepcopy(pipeline) ] crf_extractor = crf_entity_extractor(config_params) importer = RasaFileImporter(training_data_paths=["data/examples/rasa"]) training_data = importer.get_nlu_data() for component in loaded_pipeline: component.train(training_data) crf_extractor.train(training_data) message = Message(data={TEXT: "I am looking for an italian restaurant"}) for component in loaded_pipeline: component.process(message) message2 = copy.deepcopy(message) processed_message = crf_extractor.process([message])[0] loaded_extractor = CRFEntityExtractorGraphComponent.load( { **CRFEntityExtractorGraphComponent.get_default_config(), **config_params }, default_model_storage, Resource("CRFEntityExtractor"), default_execution_context, ) processed_message2 = loaded_extractor.process([message2])[0] assert processed_message2.fingerprint() == processed_message.fingerprint() detected_entities = processed_message2.get(ENTITIES) assert len(detected_entities) == 1 assert detected_entities[0]["entity"] == "cuisine" assert detected_entities[0]["value"] == "italian"