def create(bot: str, use_test_stories: bool = False): from kairon import Utility from itertools import chain from rasa.shared.nlu.training_data.training_data import TrainingData bot_home = os.path.join('testing_data', bot) Utility.make_dirs(bot_home) processor = MongoProcessor() intents_and_training_examples = processor.get_intents_and_training_examples(bot) aug_training_examples = map(lambda training_data: TestDataGenerator.__prepare_nlu(training_data[0], training_data[1]), intents_and_training_examples.items()) messages = list(chain.from_iterable(aug_training_examples)) nlu_data = TrainingData(training_examples=messages) stories = processor.load_stories(bot) rules = processor.get_rules_for_training(bot) stories = stories.merge(rules) if stories.is_empty() or nlu_data.is_empty(): raise AppException('Not enough training data exists. Please add some training data.') nlu_as_str = nlu_data.nlu_as_yaml().encode() nlu_path = os.path.join(bot_home, "nlu.yml") Utility.write_to_file(nlu_path, nlu_as_str) if use_test_stories: stories_path = os.path.join(bot_home, "test_stories.yml") else: stories_path = os.path.join(bot_home, "stories.yml") YAMLStoryWriter().dump(stories_path, stories.story_steps, is_test_story=use_test_stories) return nlu_path, stories_path
def trigger_history_server_request(bot: Text, endpoint: Text, request_body: dict, request_method: str = 'GET', return_json: bool = True): from kairon.shared.data.processor import MongoProcessor headers = {} mongo_processor = MongoProcessor() history_server = mongo_processor.get_history_server_endpoint(bot) if not Utility.check_empty_string(history_server.get('token')): headers = {'Authorization': f'Bearer {history_server["token"]}'} url = urljoin(history_server['url'], endpoint) try: logger.info(f"url : {url} {request_body}") response = requests.request(request_method, url, headers=headers, json=request_body) logger.info(f"url : {response.url} {response.request.body}") if return_json: return response.json() else: return response except requests.exceptions.ConnectionError as e: logger.error(str(e)) raise AppException( f'Unable to connect to history server: {str(e)}')
def load_fallback_actions(bot: Text): from .processor import MongoProcessor mongo_processor = MongoProcessor() config = mongo_processor.load_config(bot) fallback_action = DataUtility.parse_fallback_action(config) nlu_fallback_action = MongoProcessor.fetch_nlu_fallback_action(bot) return fallback_action, nlu_fallback_action
async def _read_and_get_data(config_path: str, domain_path: str, nlu_path: str, stories_path: str, bot: str, user: str): data_path = os.path.join(pytest.tmp_dir, str(uuid.uuid4())) os.mkdir(data_path) shutil.copy2(nlu_path, data_path) shutil.copy2(stories_path, data_path) importer = RasaFileImporter.load_from_config(config_path=config_path, domain_path=domain_path, training_data_paths=data_path) domain = await importer.get_domain() story_graph = await importer.get_stories() config = await importer.get_config() nlu = await importer.get_nlu_data(config.get('language')) processor = MongoProcessor() processor.save_training_data(bot, user, config, domain, story_graph, nlu, overwrite=True)
def unique_user_input(month, current_user_bot): from ...shared.data.processor import MongoProcessor response = Utility.trigger_history_server_request( current_user_bot, f'/api/history/{current_user_bot}/metrics/users/input', {'month': month}) user_input = response['data'] processor = MongoProcessor() training_examples = processor.get_all_training_examples( bot=current_user_bot) queries_not_present = [ query for query in user_input if query['_id'] not in training_examples[0] ] return queries_not_present
def validate_history_endpoint(bot: Text): """ Checks if the history endpoint is managed by kairon or client user :param bot: bot id :return: none """ # Check history endpoint from kairon.shared.data.processor import MongoProcessor mongo_processor = MongoProcessor() history_endpoint = mongo_processor.get_history_server_endpoint(bot) if history_endpoint.get( 'type') and history_endpoint['type'] != 'kairon': raise AppException( f'History server not managed by Kairon!. Manually delete the collection:{bot}' )
async def test_import_data_validation_failed(self): path = 'tests/testing_data/validator/common_training_examples' bot = 'test_data_import_bot' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(uuid.uuid4())) shutil.copytree(path, test_data_path) importer = DataImporter(test_data_path, bot, user, REQUIREMENTS.copy(), True) summary, component_count = await importer.validate() assert not summary.get('intents') assert not summary.get('stories') assert not summary.get('utterances') assert not summary.get('http_actions') assert summary.get('training_examples') assert not summary.get('domain') assert not summary.get('config') assert not summary.get('exception') importer.validator.intents = [] importer.import_data() processor = MongoProcessor() assert len(processor.fetch_intents(bot)) == 0 assert len(processor.fetch_stories(bot)) == 0 assert len(list(processor.fetch_training_examples(bot))) == 0 assert len(list(processor.fetch_responses(bot))) == 0 assert len(processor.fetch_actions(bot)) == 0
async def test_trigger_data_importer_validate_existing_data(self, monkeypatch, get_training_data): bot = 'test_trigger_data_importer_domain_only' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(uuid.uuid4())) Utility.make_dirs(test_data_path) def _path(*args, **kwargs): return test_data_path monkeypatch.setattr(Utility, "get_latest_file", _path) DataImporterLogProcessor.add_log(bot, user) await EventsTrigger.trigger_data_importer(bot, user, True, False) logs = list(DataImporterLogProcessor.get_logs(bot)) assert len(logs) == 2 assert not logs[0].get('intents').get('data') assert not logs[0].get('stories').get('data') assert not logs[0].get('utterances').get('data') assert [action.get('data') for action in logs[0].get('actions') if action.get('type') == 'http_actions'] == [[]] assert not logs[0].get('training_examples').get('data') assert not logs[0].get('domain').get('data') assert not logs[0].get('config').get('data') assert not logs[0].get('exception') assert logs[0]['is_data_uploaded'] assert logs[0]['start_timestamp'] assert logs[0]['end_timestamp'] assert logs[0]['status'] == 'Success' assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value mongo_processor = MongoProcessor() assert len(mongo_processor.fetch_stories(bot)) == 2 assert len(list(mongo_processor.fetch_training_examples(bot))) == 7 assert len(list(mongo_processor.fetch_responses(bot))) == 3 assert len(mongo_processor.fetch_actions(bot)) == 2 assert len(mongo_processor.fetch_rule_block_names(bot)) == 3
async def test_import_data(self): path = 'tests/testing_data/validator/valid' bot = 'test_data_import' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(uuid.uuid4())) shutil.copytree(path, test_data_path) importer = DataImporter(test_data_path, bot, user, REQUIREMENTS - {"http_actions"}, True, True) await importer.validate() importer.import_data() processor = MongoProcessor() assert 'greet' in processor.fetch_intents(bot) assert 'deny' in processor.fetch_intents(bot) assert len(processor.fetch_stories(bot)) == 2 assert len(list(processor.fetch_training_examples(bot))) == 7 assert len(list(processor.fetch_responses(bot))) == 4 assert len(processor.fetch_actions(bot)) == 2 assert len(processor.fetch_rule_block_names(bot)) == 4
def add_bot(name: str, account: int, user: str, is_new_account: bool = False): """ add a bot to account :param name: bot name :param account: account id :param user: user id :param is_new_account: True if it is a new account :return: bot id """ from kairon.shared.data.processor import MongoProcessor from kairon.shared.data.data_objects import BotSettings if Utility.check_empty_string(name): raise AppException("Bot Name cannot be empty or blank spaces") if Utility.check_empty_string(user): raise AppException("user cannot be empty or blank spaces") Utility.is_exist( Bot, exp_message="Bot already exists!", name__iexact=name, account=account, status=True, ) bot = Bot(name=name, account=account, user=user).save().to_mongo().to_dict() bot_id = bot['_id'].__str__() if not is_new_account: AccountProcessor.__allow_access_to_bot( bot_id, user, user, account, ACCESS_ROLES.OWNER.value, ACTIVITY_STATUS.ACTIVE.value) BotSettings(bot=bot_id, user=user).save() processor = MongoProcessor() config = processor.load_config(bot_id) processor.add_or_overwrite_config(config, bot_id, user) processor.add_default_fallback_data(bot_id, user, True, True) processor.add_system_required_slots(bot_id, user) return bot
class DataImporter: """ Class to import training data into kairon. A validation is run over training data before initiating the import process. """ processor = MongoProcessor() def __init__(self, path: Text, bot: Text, user: Text, files_to_save: set, save_data: bool = True, overwrite: bool = True): """Initialize data importer""" self.path = path self.bot = bot self.user = user self.save_data = save_data self.overwrite = overwrite self.files_to_save = files_to_save async def validate(self): """ Validates domain and data files to check for possible mistakes and logs them into collection. """ DataImporter.processor.prepare_training_data_for_validation( self.bot, self.path, REQUIREMENTS - self.files_to_save) data_path = os.path.join(self.path, DEFAULT_DATA_PATH) config_path = os.path.join(self.path, DEFAULT_CONFIG_PATH) domain_path = os.path.join(self.path, DEFAULT_DOMAIN_PATH) TrainingDataValidator.validate_domain(domain_path) self.validator = await TrainingDataValidator.from_training_files( data_path, domain_path, config_path, self.path) self.validator.validate_training_data(False) return self.validator.summary, self.validator.component_count def import_data(self): """ Saves training data into database. """ if self.save_data and self.files_to_save: if self.validator.config and self.validator.domain and self.validator.story_graph and self.validator.intents: DataImporter.processor.save_training_data( self.bot, self.user, self.validator.config, self.validator.domain, self.validator.story_graph, self.validator.intents, self.validator.actions, self.overwrite, self.files_to_save)
def test_write_training_data_with_rules(self): from kairon.shared.data.processor import MongoProcessor processor = MongoProcessor() training_data = processor.load_nlu( "test_load_from_path_yml_training_files") story_graph = processor.load_stories( "test_load_from_path_yml_training_files") domain = processor.load_domain( "test_load_from_path_yml_training_files") config = processor.load_config( "test_load_from_path_yml_training_files") http_action = processor.load_http_action( "test_load_from_path_yml_training_files") rules = processor.get_rules_for_training( "test_load_from_path_yml_training_files") training_data_path = Utility.write_training_data( training_data, domain, config, story_graph, rules, http_action) assert os.path.exists(training_data_path)
def test_visitor_hit_fallback_with_kairon_client(mock_auth, mock_mongo_processor): responses.add( responses.GET, f"https://localhost:8083/api/history/{pytest.bot}/metrics/fallback", status=200, json={"data": { 'fallback_count': 10, 'total_count': 90 }}, match=[ responses.json_params_matcher({ 'month': 1, 'action_fallback': 'action_default_fallback', 'nlu_fallback': 'utter_please_rephrase' }) ], ) steps = [{ "name": "nlu_fallback", "type": "INTENT" }, { "name": "utter_please_rephrase", "type": "BOT" }] rule = {'name': 'fallback_rule', 'steps': steps, 'type': 'RULE'} MongoProcessor().add_complex_story(rule, pytest.bot, 'test') response = client.get( f"/api/history/{pytest.bot}/metrics/fallback", headers={ "Authorization": pytest.token_type + " " + pytest.access_token }, ) actual = response.json() assert actual["error_code"] == 0 assert actual["data"]["fallback_count"] == 10 assert actual["data"]["total_count"] == 90 assert actual["message"] is None assert actual["success"]
async def test_trigger_data_importer_validate_and_save_overwrite_same_user(self, monkeypatch): bot = 'test_events' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(uuid.uuid4())) shutil.copytree('tests/testing_data/validator/valid', test_data_path) def _path(*args, **kwargs): return test_data_path monkeypatch.setattr(Utility, "get_latest_file", _path) DataImporterLogProcessor.add_log(bot, user, files_received=REQUIREMENTS - {"http_actions"}) await EventsTrigger.trigger_data_importer(bot, user, True, True) logs = list(DataImporterLogProcessor.get_logs(bot)) assert len(logs) == 8 assert not logs[0].get('intents').get('data') assert not logs[0].get('stories').get('data') assert not logs[0].get('utterances').get('data') assert [action.get('data') for action in logs[0].get('actions') if action.get('type') == 'http_actions'] assert not logs[0].get('training_examples').get('data') assert not logs[0].get('domain').get('data') assert not logs[0].get('config').get('data') assert not logs[0].get('exception') assert logs[0]['is_data_uploaded'] assert logs[0]['start_timestamp'] assert logs[0]['end_timestamp'] assert logs[0]['status'] == 'Success' assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value processor = MongoProcessor() assert 'greet' in processor.fetch_intents(bot) assert 'deny' in processor.fetch_intents(bot) assert len(processor.fetch_stories(bot)) == 2 assert len(list(processor.fetch_training_examples(bot))) == 7 assert len(list(processor.fetch_responses(bot))) == 4 assert len(processor.fetch_actions(bot)) == 2 assert len(processor.fetch_rule_block_names(bot)) == 4
async def test_write_training_data(self): from kairon.shared.data.processor import MongoProcessor processor = MongoProcessor() await (processor.save_from_path( "./tests/testing_data/yml_training_files", bot="test_load_from_path_yml_training_files", user="******")) training_data = processor.load_nlu( "test_load_from_path_yml_training_files") story_graph = processor.load_stories( "test_load_from_path_yml_training_files") domain = processor.load_domain( "test_load_from_path_yml_training_files") config = processor.load_config( "test_load_from_path_yml_training_files") http_action = processor.load_http_action( "test_load_from_path_yml_training_files") training_data_path = Utility.write_training_data( training_data, domain, config, story_graph, None, http_action) assert os.path.exists(training_data_path)
class AgentProcessor: """ Class contains logic for loading bot agents """ mongo_processor = MongoProcessor() cache_provider: AgentCache = InMemoryAgentCache() @staticmethod def get_agent(bot: Text) -> Agent: """ fetch the bot agent from cache if exist otherwise load it into the cache :param bot: bot id :return: Agent Object """ if not AgentProcessor.cache_provider.is_exists(bot): AgentProcessor.reload(bot) return AgentProcessor.cache_provider.get(bot) @staticmethod def reload(bot: Text): """ reload bot agent :param bot: bot id :return: None """ try: endpoint = AgentProcessor.mongo_processor.get_endpoints( bot, raise_exception=False) action_endpoint = Utility.get_action_url(endpoint) model_path = Utility.get_latest_model(bot) domain = AgentProcessor.mongo_processor.load_domain(bot) mongo_store = Utility.get_local_mongo_store(bot, domain) agent = Agent.load(model_path, action_endpoint=action_endpoint, tracker_store=mongo_store) AgentProcessor.cache_provider.set(bot, agent) except Exception as e: logging.exception(e) raise AppException("Bot has not been trained yet!")
def train_model_for_bot(bot: str): """ loads bot data from mongo into individual files for training :param bot: bot id :return: model path """ processor = MongoProcessor() nlu = processor.load_nlu(bot) if not nlu.training_examples: raise AppException("Training data does not exists!") domain = processor.load_domain(bot) stories = processor.load_stories(bot) config = processor.load_config(bot) rules = processor.get_rules_for_training(bot) directory = Utility.write_training_data(nlu, domain, config, stories, rules) output = os.path.join(DEFAULT_MODELS_PATH, bot) if not os.path.exists(output): os.mkdir(output) model = train(domain=os.path.join(directory, DEFAULT_DOMAIN_PATH), config=os.path.join(directory, DEFAULT_CONFIG_PATH), training_files=os.path.join(directory, DEFAULT_DATA_PATH), output=output, core_additional_arguments={ "augmentation_factor": 100 }, force_training=True).model Utility.delete_directory(directory) del processor del nlu del domain del stories del config Utility.move_old_models(output, model) return model
async def test_trigger_data_importer_forced_import(self, monkeypatch): bot = 'forced_import' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(uuid.uuid4())) shutil.copytree('tests/testing_data/validator/orphan_utterances', test_data_path) def _path(*args, **kwargs): return test_data_path monkeypatch.setattr(Utility, "get_latest_file", _path) BotSettings(force_import=True, bot=bot, user=user).save() DataImporterLogProcessor.add_log(bot, user, files_received=['nlu', 'stories', 'domain', 'config']) await EventsTrigger.trigger_data_importer(bot, user, True, True) logs = list(DataImporterLogProcessor.get_logs(bot)) assert len(logs) == 1 assert not logs[0].get('intents').get('data') assert not logs[0].get('stories').get('data') assert logs[0].get('utterances').get('data') assert [action.get('data') for action in logs[0].get('actions') if action.get('type') == 'http_actions'] assert not logs[0].get('training_examples').get('data') assert not logs[0].get('domain').get('data') assert not logs[0].get('config').get('data') assert not logs[0].get('exception') assert logs[0]['is_data_uploaded'] assert logs[0]['start_timestamp'] assert logs[0]['end_timestamp'] assert logs[0]['status'] == 'Success' assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value mongo_processor = MongoProcessor() assert len(mongo_processor.fetch_stories(bot)) == 2 assert len(list(mongo_processor.fetch_training_examples(bot))) == 8 assert len(list(mongo_processor.fetch_responses(bot))) == 8 assert len(mongo_processor.fetch_actions(bot)) == 0 assert len(mongo_processor.fetch_rule_block_names(bot)) == 1
async def test_trigger_data_importer_domain_only(self, monkeypatch, get_training_data): bot = 'test_trigger_data_importer_domain_only' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(uuid.uuid4())) Utility.make_dirs(test_data_path) shutil.copy2('tests/testing_data/validator/valid/domain.yml', test_data_path) nlu, story_graph, domain, config, http_actions = await get_training_data('tests/testing_data/validator/valid') mongo_processor = MongoProcessor() mongo_processor.save_stories(story_graph.story_steps, bot, user) mongo_processor.save_nlu(nlu, bot, user) config["bot"] = bot config["user"] = user config_obj = Configs._from_son(config) config_obj.save() mongo_processor.save_rules(story_graph.story_steps, bot, user) mongo_processor.save_integrated_actions(http_actions, bot, user) def _path(*args, **kwargs): return test_data_path monkeypatch.setattr(Utility, "get_latest_file", _path) DataImporterLogProcessor.add_log(bot, user, files_received=["domain"]) await EventsTrigger.trigger_data_importer(bot, user, True, False) logs = list(DataImporterLogProcessor.get_logs(bot)) assert len(logs) == 1 assert not logs[0].get('intents').get('data') assert not logs[0].get('stories').get('data') assert not logs[0].get('utterances').get('data') assert [action.get('data') for action in logs[0].get('actions') if action.get('type') == 'http_actions'] == [[]] assert not logs[0].get('training_examples').get('data') assert not logs[0].get('domain').get('data') assert not logs[0].get('config').get('data') assert not logs[0].get('exception') assert logs[0]['is_data_uploaded'] assert logs[0]['start_timestamp'] assert logs[0]['end_timestamp'] assert logs[0]['status'] == 'Success' assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value assert len(mongo_processor.fetch_stories(bot)) == 2 assert len(list(mongo_processor.fetch_training_examples(bot))) == 7 assert len(list(mongo_processor.fetch_responses(bot))) == 3 assert len(mongo_processor.fetch_actions(bot)) == 2 assert len(mongo_processor.fetch_rule_block_names(bot)) == 3
async def test_import_data_dont_save(self): path = 'tests/testing_data/validator/common_training_examples' bot = 'test_data_import' bot_2 = 'test_data_import_bot' user = '******' test_data_path = os.path.join(pytest.tmp_dir, str(uuid.uuid4())) shutil.copytree(path, test_data_path) importer = DataImporter(test_data_path, bot, user, set(), False) await importer.validate() importer.import_data() processor = MongoProcessor() intents = processor.fetch_intents(bot) assert 'greet' in intents assert 'deny' in intents assert 'location' in intents assert 'affirm' in intents assert len(processor.fetch_stories(bot)) == 4 assert len(list(processor.fetch_training_examples(bot))) == 13 assert len(list(processor.fetch_responses(bot))) == 6 assert len(processor.fetch_actions(bot)) == 4 assert len(processor.fetch_rule_block_names(bot)) == 4 assert len(processor.fetch_intents(bot_2)) == 0 assert len(processor.fetch_stories(bot_2)) == 0 assert len(list(processor.fetch_training_examples(bot_2))) == 0 assert len(list(processor.fetch_responses(bot_2))) == 0 assert len(processor.fetch_actions(bot_2)) == 0 assert len(processor.fetch_rule_block_names(bot_2)) == 0
"email": "*****@*****.**", "first_name": "Test", "last_name": "Chat", "password": "******", "confirm_password": "******", "account": "ChatTesting" }).dict())) token = Authentication.authenticate("*****@*****.**", "testChat@12") token_type = "Bearer" user = AccountProcessor.get_complete_user_details("*****@*****.**") bot = user['bots']['account_owned'][0]['_id'] start_training(bot, "*****@*****.**") bot2 = AccountProcessor.add_bot("testChat2", user['account'], "*****@*****.**")['_id'].__str__() loop.run_until_complete(MongoProcessor().save_from_path( "template/use-cases/Hi-Hello", bot2, user="******")) start_training(bot2, "*****@*****.**") bot3 = AccountProcessor.add_bot("testChat3", user['account'], "*****@*****.**")['_id'].__str__() ChatDataProcessor.save_channel_config( { "connector_type": "slack", "config": { "bot_user_oAuth_token": "xoxb-801939352912-801478018484-v3zq6MYNu62oSs8vammWOY8K", "slack_signing_secret": "79f036b9894eef17c064213b90d1042b" } }, bot, user="******") responses.start()