def __prepare_nlu(intent: str, training_examples: list): from rasa.shared.nlu.training_data.message import Message from kairon.shared.data.constant import TRAINING_EXAMPLE from rasa.shared.nlu.constants import TEXT from kairon import Utility if training_examples: test_data_threshold = Utility.environment['model']['test'].get('dataset_threshold') or 10 if len(training_examples) >= 100: test_data_threshold = Utility.environment['model']['test'].get('dataset_percentage') or 10 test_data_threshold = test_data_threshold/100 num_samples = int(len(training_examples) * test_data_threshold) training_examples = random.sample(training_examples, num_samples) elif len(training_examples) > test_data_threshold: training_examples = random.sample(training_examples, test_data_threshold) else: return [] augmented_examples = TestDataGenerator.augment_sentences(training_examples) for example in augmented_examples: message = Message() plain_text, entities = DataUtility.extract_text_and_entities(example) message.data = {TRAINING_EXAMPLE.INTENT.value: intent, TEXT: plain_text} if entities: message.data[TRAINING_EXAMPLE.ENTITIES.value] = entities yield message if not augmented_examples: return []
def validate_rasa_config(config: Dict): """ validates bot config.yml content for invalid entries :param config: configuration :return: None """ config_errors = [] from rasa.nlu.registry import registered_components as nlu_components if config.get('pipeline'): for item in config['pipeline']: component_cfg = item['name'] if not (component_cfg in nlu_components or component_cfg in ["custom.ner.SpacyPatternNER", "custom.fallback.FallbackIntentFilter"]): config_errors.append("Invalid component " + component_cfg) else: config_errors.append("You didn't define any pipeline") if config.get('policies'): core_policies = DataUtility.get_rasa_core_policies() for policy in config['policies']: if policy['name'] not in core_policies: config_errors.append("Invalid policy " + policy['name']) else: config_errors.append("You didn't define any policies") return config_errors
def validate(self, clean=True): from kairon.shared.data.utils import DataUtility Utility.validate_channel_config(self.connector_type, self.config, ValidationError) if self.connector_type == "telegram": webhook_url = DataUtility.get_channel_endpoint({ 'bot': self.bot, 'user': self.user, 'connector_type': self.connector_type }) Utility.register_telegram_webhook(Utility.decrypt_message(self.config['access_token']), webhook_url)
def verify_utterances_in_stories(self, raise_exception: bool = True): """ Validates utterances in stories. @param raise_exception: Set this flag to false to prevent raising exceptions. @return: """ utterance_mismatch_summary = [] story_utterance_not_found_in_domain = [] self.verify_utterances(raise_exception) utterance_actions = self.validator._gather_utterance_actions() fallback_action = DataUtility.parse_fallback_action(self.config) system_triggered_actions = DEFAULT_ACTIONS.union(SYSTEM_TRIGGERED_UTTERANCES) stories_utterances = set() for story in self.story_graph.story_steps: for event in story.events: if not isinstance(event, ActionExecuted): continue if not event.action_name.startswith(UTTER_PREFIX): # we are only interested in utter actions continue if event.action_name in stories_utterances: # we already processed this one before, we only want to warn once continue if event.action_name not in utterance_actions and event.action_name not in system_triggered_actions: msg = f"The action '{event.action_name}' is used in the stories, " \ f"but is not a valid utterance action. Please make sure " \ f"the action is listed in your domain and there is a " \ f"template defined with its name." if raise_exception: raise AppException(msg) story_utterance_not_found_in_domain.append(msg) stories_utterances.add(event.action_name) for utterance in utterance_actions: if utterance not in stories_utterances and utterance not in system_triggered_actions.union(fallback_action): msg = f"The utterance '{utterance}' is not used in any story." if raise_exception: raise AppException(msg) utterance_mismatch_summary.append(msg) if not self.summary.get('utterances'): self.summary['utterances'] = [] self.summary['utterances'] = self.summary['utterances'] + utterance_mismatch_summary if not self.summary.get('stories'): self.summary['stories'] = [] self.summary['stories'] = self.summary['stories'] + story_utterance_not_found_in_domain
async def paraphrases(request_data: ParaphrasesRequest, current_user: User = Depends( Authentication.get_current_user)): """ Generates other similar text by augmenting original text """ plain_text_data = [ DataUtility.extract_text_and_entities(data)[0] for data in request_data.data ] response = requests.post( Utility.environment["augmentation"]["paraphrase_url"], json=plain_text_data) return response.json()
def __augment_entities(input_text: list, stopwords: list, entity_names: list): final_augmented_text = [] if input_text and stopwords: for txt in input_text: for i, word in enumerate(stopwords): if word in txt: final_augmented_text.append(txt.replace(word, f'[{word}]({entity_names[i]})')) final_augmented_text.extend(list( map( lambda synonym: txt.replace(word, f'[{synonym}]({entity_names[i]})'), DataUtility.generate_synonym(word)) )) else: final_augmented_text = input_text return final_augmented_text
async def visitor_hit_fallback(month: int = Query(default=1, ge=1, le=6), current_user: User = Security( Authentication.get_current_user_and_bot, scopes=TESTER_ACCESS)): """ Fetches the number of times the agent hit a fallback (ie. not able to answer) to user queries """ fallback_action, nlu_fallback_action = DataUtility.load_fallback_actions( current_user.get_bot()) return Utility.trigger_history_server_request( current_user.get_bot(), f'/api/history/{current_user.get_bot()}/metrics/fallback', { 'month': month, 'action_fallback': fallback_action, 'nlu_fallback': nlu_fallback_action })
async def fallback_dropoff(month: int = Query(default=1, ge=1, le=6), current_user: User = Security( Authentication.get_current_user_and_bot, scopes=TESTER_ACCESS)): """ Fetches the list of users that dropped off after encountering fallback """ fallback_action, nlu_fallback_action = DataUtility.load_fallback_actions( current_user.get_bot()) return Utility.trigger_history_server_request( current_user.get_bot(), f'/api/history/{current_user.get_bot()}/metrics/fallback/dropoff', { 'month': month, 'action_fallback': fallback_action, 'nlu_fallback': nlu_fallback_action })
async def fallback_trend(month: int = Query(default=6, ge=1, le=6), current_user: User = Security( Authentication.get_current_user_and_bot, scopes=TESTER_ACCESS)): """ Fetches the fallback count of the bot for previous months """ fallback_action, nlu_fallback_action = DataUtility.load_fallback_actions( current_user.get_bot()) return Utility.trigger_history_server_request( current_user.get_bot(), f'/api/history/{current_user.get_bot()}/trends/fallback', { 'month': month, 'action_fallback': fallback_action, 'nlu_fallback': nlu_fallback_action })
async def unsuccessful_session_count( month: int = Query(default=1, ge=1, le=6), current_user: User = Security(Authentication.get_current_user_and_bot, scopes=TESTER_ACCESS)): """ Fetches the count of sessions that encountered a fallback for a particular user. """ fallback_action, nlu_fallback_action = DataUtility.load_fallback_actions( current_user.get_bot()) return Utility.trigger_history_server_request( current_user.get_bot(), f'/api/history/{current_user.get_bot()}/metrics/sessions/unsuccessful', { 'month': month, 'action_fallback': fallback_action, 'nlu_fallback': nlu_fallback_action })
async def complete_conversations(month: int = Query(default=1, ge=1, le=6), current_user: User = Security( Authentication.get_current_user_and_bot, scopes=TESTER_ACCESS)): """ Fetches the number of successful conversations of the bot, which had no fallback """ fallback_action, nlu_fallback_action = DataUtility.load_fallback_actions( current_user.get_bot()) return Utility.trigger_history_server_request( current_user.get_bot(), f'/api/history/{current_user.get_bot()}/metrics/conversation/success', { 'month': month, 'action_fallback': fallback_action, 'nlu_fallback': nlu_fallback_action })
def test_prepare_nlu_text_with_entities(self): expected = "n=[8](n), p=1[8](n), k=2[8](n) ec=[14](ec), ph=[3](p)" text, entities = DataUtility.extract_text_and_entities(expected) actual = DataUtility.prepare_nlu_text(text, entities) assert expected == actual
def test_validate_path_not_found(self): with pytest.raises(AppException): DataUtility.validate_and_get_requirements('/tests/path_not_found')
def test_get_interpreter_with_no_model(self): actual = DataUtility.get_interpreter("test.tar.gz") assert actual is None
def test_prepare_nlu_text(self): expected = "India is beautiful" text, entities = DataUtility.extract_text_and_entities(expected) actual = DataUtility.prepare_nlu_text(text, entities) assert expected == actual
def __augment_sentences_with_mistakes_and_entities(input_text: str, stopwords, entity_names): augmented_text = list(DataUtility.augment_sentences([input_text], stopwords)) augmented_text = TestDataGenerator.__augment_entities(augmented_text, stopwords, entity_names) return augmented_text
def test_validate_files(self, resource_validate_files): requirements = DataUtility.validate_and_get_requirements( pytest.bot_data_home_dir) assert not requirements
def test_validate_only_config(self, resource_validate_only_config): requirements = DataUtility.validate_and_get_requirements( pytest.bot_data_home_dir, True) assert {'rules', 'actions', 'domain', 'stories', 'nlu'} == requirements
def test_validate_only_stories_and_nlu( self, resource_validate_only_stories_and_nlu): requirements = DataUtility.validate_and_get_requirements( pytest.bot_data_home_dir, True) assert {'actions', 'config', 'domain'} == requirements
def test_validate_no_files_delete_dir( self, resource_validate_no_training_files_delete_dir): with pytest.raises(AppException): DataUtility.validate_and_get_requirements(pytest.bot_data_home_dir, True) assert not os.path.exists(pytest.bot_data_home_dir)