def load_fallback_actions(bot: Text): from kairon.data_processor.processor import MongoProcessor mongo_processor = MongoProcessor() config = mongo_processor.load_config(bot) action_fallback = next((comp for comp in config['policies'] if comp["name"] == "RulePolicy"), None) fallback_action = action_fallback.get("core_fallback_action_name") fallback_action = fallback_action if fallback_action else "action_default_fallback" nlu_fallback_action = MongoProcessor.fetch_nlu_fallback_action(bot) return fallback_action, nlu_fallback_action
def train_model_for_bot(bot: str): """ loads bot data from mongo into individual files for training :param bot: bot id :return: model path """ processor = MongoProcessor() nlu = processor.load_nlu(bot) if not nlu.training_examples: raise AppException("Training data does not exists!") domain = processor.load_domain(bot) stories = processor.load_stories(bot) config = processor.load_config(bot) rules = processor.get_rules_for_training(bot) directory = Utility.write_training_data(nlu, domain, config, stories, rules) output = os.path.join(DEFAULT_MODELS_PATH, bot) model = train( domain=os.path.join(directory, DEFAULT_DOMAIN_PATH), config=os.path.join(directory, DEFAULT_CONFIG_PATH), training_files=os.path.join(directory, DEFAULT_DATA_PATH), output=output, ) Utility.delete_directory(directory) del processor del nlu del domain del stories del config return model
async def test_import_data_validation_failed(self): path = 'tests/testing_data/validator/common_training_examples' bot = 'test_data_import_bot' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(datetime.utcnow())) shutil.copytree(path, test_data_path) importer = DataImporter(test_data_path, bot, user, REQUIREMENTS.copy(), True) summary, component_count = await importer.validate() assert not summary.get('intents') assert not summary.get('stories') assert not summary.get('utterances') assert not summary.get('http_actions') assert summary.get('training_examples') assert not summary.get('domain') assert not summary.get('config') assert not summary.get('exception') importer.validator.intents = [] importer.import_data() processor = MongoProcessor() assert len(processor.fetch_intents(bot)) == 0 assert len(processor.fetch_stories(bot)) == 0 assert len(list(processor.fetch_training_examples(bot))) == 0 assert len(list(processor.fetch_responses(bot))) == 0 assert len(processor.fetch_actions(bot)) == 0
def test_write_training_data_with_rules(self): from kairon.data_processor.processor import MongoProcessor processor = MongoProcessor() training_data = processor.load_nlu("test_load_from_path_yml_training_files") story_graph = processor.load_stories("test_load_from_path_yml_training_files") domain = processor.load_domain("test_load_from_path_yml_training_files") config = processor.load_config("test_load_from_path_yml_training_files") http_action = processor.load_http_action("test_load_from_path_yml_training_files") rules = processor.get_rules_for_training("test_load_from_path_yml_training_files") training_data_path = Utility.write_training_data(training_data, domain, config, story_graph, rules, http_action) assert os.path.exists(training_data_path)
def is_data_import_allowed(summary: dict, bot: Text, user: Text): from kairon.data_processor.processor import MongoProcessor bot_settings = MongoProcessor.get_bot_settings(bot, user) if bot_settings.force_import: return True if bot_settings.ignore_utterances: is_data_valid = all([not summary[key] for key in summary.keys() if 'utterances' != key]) else: is_data_valid = all([not summary[key] for key in summary.keys()]) return is_data_valid
async def test_write_training_data(self): from kairon.data_processor.processor import MongoProcessor processor = MongoProcessor() await ( processor.save_from_path( "./tests/testing_data/yml_training_files", bot="test_load_from_path_yml_training_files", user="******" ) ) training_data = processor.load_nlu("test_load_from_path_yml_training_files") story_graph = processor.load_stories("test_load_from_path_yml_training_files") domain = processor.load_domain("test_load_from_path_yml_training_files") config = processor.load_config("test_load_from_path_yml_training_files") http_action = processor.load_http_action("test_load_from_path_yml_training_files") training_data_path = Utility.write_training_data(training_data, domain, config, story_graph, None, http_action) assert os.path.exists(training_data_path)
async def test_import_data(self): path = 'tests/testing_data/validator/valid' bot = 'test_data_import' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(datetime.utcnow())) shutil.copytree(path, test_data_path) importer = DataImporter(test_data_path, bot, user, REQUIREMENTS - {"http_actions"}, True, True) await importer.validate() importer.import_data() processor = MongoProcessor() assert 'greet' in processor.fetch_intents(bot) assert 'deny' in processor.fetch_intents(bot) assert len(processor.fetch_stories(bot)) == 2 assert len(list(processor.fetch_training_examples(bot))) == 7 assert len(list(processor.fetch_responses(bot))) == 4 assert len(processor.fetch_actions(bot)) == 2 assert len(processor.fetch_rule_block_names(bot)) == 4
async def test_trigger_data_importer_validate_and_save_overwrite_same_user( self, monkeypatch): bot = 'test_events' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(datetime.utcnow())) shutil.copytree('tests/testing_data/validator/valid', test_data_path) def _path(*args, **kwargs): return test_data_path monkeypatch.setattr(Utility, "get_latest_file", _path) DataImporterLogProcessor.add_log(bot, user, files_received=REQUIREMENTS - {"http_actions"}) await EventsTrigger.trigger_data_importer(bot, user, True, True) logs = list(DataImporterLogProcessor.get_logs(bot)) assert len(logs) == 7 assert not logs[0].get('intents').get('data') assert not logs[0].get('stories').get('data') assert not logs[0].get('utterances').get('data') assert not logs[0].get('http_actions').get('data') assert not logs[0].get('training_examples').get('data') assert not logs[0].get('domain').get('data') assert not logs[0].get('config').get('data') assert not logs[0].get('exception') assert logs[0]['is_data_uploaded'] assert logs[0]['start_timestamp'] assert logs[0]['end_timestamp'] assert logs[0]['status'] == 'Success' assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value processor = MongoProcessor() assert 'greet' in processor.fetch_intents(bot) assert 'deny' in processor.fetch_intents(bot) assert len(processor.fetch_stories(bot)) == 2 assert len(list(processor.fetch_training_examples(bot))) == 7 assert len(list(processor.fetch_responses(bot))) == 4 assert len(processor.fetch_actions(bot)) == 2 assert len(processor.fetch_rule_block_names(bot)) == 4
def test_visitor_hit_fallback_nlu_fallback_configured( self, mock_fallback_user_data): steps = [{ "name": "nlu_fallback", "type": "INTENT" }, { "name": "utter_please_rephrase", "type": "BOT" }] rule = {'name': 'fallback_rule', 'steps': steps, 'type': 'RULE'} MongoProcessor().add_complex_story( rule, "5b029887-bed2-4bbb-aa25-bd12fda26244", 'test') hit_fall_back, message = ChatHistory.visitor_hit_fallback( "5b029887-bed2-4bbb-aa25-bd12fda26244") assert hit_fall_back["fallback_count"] == 2 assert hit_fall_back["total_count"] == 4 assert message is None
async def test_trigger_data_importer_import_with_intent_issues( self, monkeypatch): bot = 'test_trigger_data_importer_import_with_intent_issues' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(datetime.utcnow())) shutil.copytree('tests/testing_data/validator/intent_name_mismatch', test_data_path) def _path(*args, **kwargs): return test_data_path monkeypatch.setattr(Utility, "get_latest_file", _path) BotSettings(ignore_utterances=True, bot=bot, user=user).save() DataImporterLogProcessor.add_log( bot, user, files_received=['nlu', 'stories', 'domain', 'config']) await EventsTrigger.trigger_data_importer(bot, user, True, True) logs = list(DataImporterLogProcessor.get_logs(bot)) assert len(logs) == 1 assert logs[0].get('intents').get('data') assert not logs[0].get('stories').get('data') assert not logs[0].get('utterances').get('data') assert not logs[0].get('http_actions').get('data') assert not logs[0].get('training_examples').get('data') assert not logs[0].get('domain').get('data') assert not logs[0].get('config').get('data') assert not logs[0].get('exception') assert logs[0]['is_data_uploaded'] assert logs[0]['start_timestamp'] assert logs[0]['end_timestamp'] assert logs[0]['status'] == 'Failure' assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value mongo_processor = MongoProcessor() assert len(mongo_processor.fetch_stories(bot)) == 0 assert len(list(mongo_processor.fetch_training_examples(bot))) == 0 assert len(list(mongo_processor.fetch_responses(bot))) == 0 assert len(mongo_processor.fetch_actions(bot)) == 0 assert len(mongo_processor.fetch_rule_block_names(bot)) == 0
def add_bot(name: str, account: int, user: str, is_new_account: bool = False): """ add a bot to account :param name: bot name :param account: account id :param user: user id :param is_new_account: True if it is a new account :return: bot id """ if Utility.check_empty_string(name): raise AppException("Bot Name cannot be empty or blank spaces") if Utility.check_empty_string(user): raise AppException("user cannot be empty or blank spaces") Utility.is_exist( Bot, exp_message="Bot already exists!", name__iexact=name, account=account, status=True, ) bot = Bot(name=name, account=account, user=user).save().to_mongo().to_dict() bot_id = bot['_id'].__str__() if not is_new_account: AccountProcessor.add_bot_for_user(bot_id, user) BotSettings(bot=bot_id, user=user).save() processor = MongoProcessor() config = processor.load_config(bot_id) processor.add_or_overwrite_config(config, bot_id, user) processor.add_default_fallback_data(bot_id, user, True, True) return bot
async def test_trigger_data_importer_validate_existing_data( self, monkeypatch, get_training_data): bot = 'test_trigger_data_importer_domain_only' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(datetime.utcnow())) Utility.make_dirs(test_data_path) def _path(*args, **kwargs): return test_data_path monkeypatch.setattr(Utility, "get_latest_file", _path) DataImporterLogProcessor.add_log(bot, user) await EventsTrigger.trigger_data_importer(bot, user, True, False) logs = list(DataImporterLogProcessor.get_logs(bot)) assert len(logs) == 2 assert not logs[0].get('intents').get('data') assert not logs[0].get('stories').get('data') assert not logs[0].get('utterances').get('data') assert not logs[0].get('http_actions').get('data') assert not logs[0].get('training_examples').get('data') assert not logs[0].get('domain').get('data') assert not logs[0].get('config').get('data') assert not logs[0].get('exception') assert logs[0]['is_data_uploaded'] assert logs[0]['start_timestamp'] assert logs[0]['end_timestamp'] assert logs[0]['status'] == 'Success' assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value mongo_processor = MongoProcessor() assert len(mongo_processor.fetch_stories(bot)) == 2 assert len(list(mongo_processor.fetch_training_examples(bot))) == 7 assert len(list(mongo_processor.fetch_responses(bot))) == 2 assert len(mongo_processor.fetch_actions(bot)) == 2 assert len(mongo_processor.fetch_rule_block_names(bot)) == 3
def train_model_for_bot(bot: str): """ loads bot data from mongo into individual files for training :param bot: bot id :return: model path """ processor = MongoProcessor() nlu = processor.load_nlu(bot) if not nlu.training_examples: raise AppException("Training data does not exists!") domain = processor.load_domain(bot) stories = processor.load_stories(bot) config = processor.load_config(bot) directory = Utility.save_files( nlu.nlu_as_markdown().encode(), domain.as_yaml().encode(), stories.as_story_string().encode(), yaml.dump(config).encode(), ) output = os.path.join(DEFAULT_MODELS_PATH, bot) model = train( domain=os.path.join(directory, DEFAULT_DOMAIN_PATH), config=os.path.join(directory, DEFAULT_CONFIG_PATH), training_files=os.path.join(directory, DEFAULT_DATA_PATH), output=output, ) Utility.delete_directory(directory) del processor del nlu del domain del stories del config return model
from kairon.data_processor.constant import MODEL_TRAINING_STATUS, TRAINING_DATA_GENERATOR_STATUS from kairon.data_processor.data_objects import TrainingExamples from kairon.data_processor.processor import ( MongoProcessor, AgentProcessor, ModelProcessor, TrainingDataGenerationProcessor, ) from kairon.exceptions import AppException from kairon.train import start_training from kairon.utils import Utility from urllib.parse import urljoin router = APIRouter() auth = Authentication() mongo_processor = MongoProcessor() @router.get("/intents", response_model=Response) async def get_intents(current_user: User = Depends(auth.get_current_user)): """ Fetches list of existing intents for particular bot """ return Response( data=mongo_processor.get_intents(current_user.get_bot())).dict() @router.get("/intents/all", response_model=Response) async def get_intents_with_training_examples(current_user: User = Depends( auth.get_current_user)): """
async def test_trigger_data_importer_domain_only(self, monkeypatch, get_training_data): bot = 'test_trigger_data_importer_domain_only' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(datetime.utcnow())) Utility.make_dirs(test_data_path) shutil.copy2('tests/testing_data/validator/valid/domain.yml', test_data_path) nlu, story_graph, domain, config, http_actions = await get_training_data( 'tests/testing_data/validator/valid') mongo_processor = MongoProcessor() mongo_processor.save_stories(story_graph.story_steps, bot, user) mongo_processor.save_nlu(nlu, bot, user) config["bot"] = bot config["user"] = user config_obj = Configs._from_son(config) config_obj.save() mongo_processor.save_rules(story_graph.story_steps, bot, user) mongo_processor.save_http_action(http_actions, bot, user) def _path(*args, **kwargs): return test_data_path monkeypatch.setattr(Utility, "get_latest_file", _path) DataImporterLogProcessor.add_log(bot, user, files_received=["domain"]) await EventsTrigger.trigger_data_importer(bot, user, True, False) logs = list(DataImporterLogProcessor.get_logs(bot)) assert len(logs) == 1 assert not logs[0].get('intents').get('data') assert not logs[0].get('stories').get('data') assert not logs[0].get('utterances').get('data') assert not logs[0].get('http_actions').get('data') assert not logs[0].get('training_examples').get('data') assert not logs[0].get('domain').get('data') assert not logs[0].get('config').get('data') assert not logs[0].get('exception') assert logs[0]['is_data_uploaded'] assert logs[0]['start_timestamp'] assert logs[0]['end_timestamp'] assert logs[0]['status'] == 'Success' assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value assert len(mongo_processor.fetch_stories(bot)) == 2 assert len(list(mongo_processor.fetch_training_examples(bot))) == 7 assert len(list(mongo_processor.fetch_responses(bot))) == 2 assert len(mongo_processor.fetch_actions(bot)) == 2 assert len(mongo_processor.fetch_rule_block_names(bot)) == 3
async def test_import_data_dont_save(self): path = 'tests/testing_data/validator/common_training_examples' bot = 'test_data_import' bot_2 = 'test_data_import_bot' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(datetime.utcnow())) shutil.copytree(path, test_data_path) importer = DataImporter(test_data_path, bot, user, set(), False) await importer.validate() importer.import_data() processor = MongoProcessor() intents = processor.fetch_intents(bot) assert 'greet' in intents assert 'deny' in intents assert 'location' in intents assert 'affirm' in intents assert len(processor.fetch_stories(bot)) == 4 assert len(list(processor.fetch_training_examples(bot))) == 13 assert len(list(processor.fetch_responses(bot))) == 6 assert len(processor.fetch_actions(bot)) == 4 assert len(processor.fetch_rule_block_names(bot)) == 4 assert len(processor.fetch_intents(bot_2)) == 0 assert len(processor.fetch_stories(bot_2)) == 0 assert len(list(processor.fetch_training_examples(bot_2))) == 0 assert len(list(processor.fetch_responses(bot_2))) == 0 assert len(processor.fetch_actions(bot_2)) == 0 assert len(processor.fetch_rule_block_names(bot_2)) == 0