async def test_trigger_model_testing_event_run_tests_on_model(self, load_data, create_model, monkeypatch): import rasa.utils.common bot = 'test_events_bot' user = '******' config_path = 'tests/testing_data/model_tester/config.yml' domain_path = 'tests/testing_data/model_tester/domain.yml' nlu_path = 'tests/testing_data/model_tester/nlu_success/nlu.yml' stories_path = 'tests/testing_data/model_tester/training_stories_success/stories.yml' await load_data(config_path, domain_path, nlu_path, stories_path, bot, user) create_model(pytest.model_path, bot) def _mock_stories_output(*args, **kwargs): return { "precision": 0.91, "f1": 0.98, "accuracy": 0.99, "failed_stories": [], } monkeypatch.setattr(rasa.utils.common, 'run_in_loop', _mock_stories_output) responses.add('POST', Utility.environment["augmentation"]["paraphrase_url"], json={'data': {'paraphrases': ['common training example']}}) responses.start() EventsTrigger.trigger_model_testing(bot, user, False) logs = list(ModelTestingLogProcessor.get_logs(bot)) assert len(logs) == 2 assert not logs[0].get('exception') assert logs[0]['start_timestamp'] assert logs[0].get('data') assert logs[0].get('end_timestamp') assert not Utility.check_empty_string(logs[0].get('status')) assert logs[0]['event_status'] == EVENT_STATUS.COMPLETED.value assert not os.path.exists(os.path.join('./testing_data', bot))
def get_logs(bot: str, log_type: str = None, reference_id: str = None, start_idx: int = 0, page_size: int = 10): """ Get all logs for data importer event. @param reference_id: test reference_id @param bot: bot id. @param log_type: log data type: 'stories', 'nlu' @param start_idx: start index in list field @param page_size: number of rows from start index @return: list of logs. """ from kairon import Utility if not (Utility.check_empty_string(log_type) and Utility.check_empty_string(reference_id)): logs = ModelTestingLogProcessor.get_by_id_and_type( reference_id, bot, log_type, start_idx, page_size) else: logs = ModelTestingLogProcessor.get_all(bot) return logs
def run_test_on_nlu(nlu_path: str, model_path: str): """ Run tests on stories. Args: nlu_path: path where nlu test data is present as YAML. model_path: Model path where model on which test has to be run is present. Returns: dictionary with evaluation results """ from rasa.model import get_model import rasa.shared.nlu.training_data.loading from rasa.nlu.model import Interpreter from rasa.nlu.test import ( remove_pretrained_extractors, get_eval_data, evaluate_intents, evaluate_response_selections, get_entity_extractors, ) from kairon import Utility unpacked_model = get_model(model_path) nlu_model = os.path.join(unpacked_model, "nlu") interpreter = Interpreter.load(nlu_model) interpreter.pipeline = remove_pretrained_extractors(interpreter.pipeline) test_data = rasa.shared.nlu.training_data.loading.load_data( nlu_path, interpreter.model_metadata.language ) result: Dict[Text, Optional[Dict]] = { "intent_evaluation": None, "entity_evaluation": None, "response_selection_evaluation": None, } (intent_results, response_selection_results, entity_results) = get_eval_data( interpreter, test_data ) if intent_results: successes = [] errors = [] result["intent_evaluation"] = evaluate_intents(intent_results, None, False, False, True) if result["intent_evaluation"].get('predictions'): del result["intent_evaluation"]['predictions'] del result["intent_evaluation"]['report'] for r in intent_results: if r.intent_target == r.intent_prediction: pass # successes.append({ # "text": r.message, # "intent": r.intent_target, # "intent_prediction": { # 'name': r.intent_prediction, # "confidence": r.confidence, # }, # }) else: errors.append({ "text": r.message, "intent": r.intent_target, "intent_prediction": { 'name': r.intent_prediction, "confidence": r.confidence, }, }) result["intent_evaluation"]['total_count'] = len(successes) + len(errors) result["intent_evaluation"]['success_count'] = len(successes) result["intent_evaluation"]['failure_count'] = len(errors) result["intent_evaluation"]['successes'] = successes result["intent_evaluation"]['errors'] = errors if response_selection_results: successes = [] errors = [] result["response_selection_evaluation"] = evaluate_response_selections( response_selection_results, None, False, False, True ) if result["response_selection_evaluation"].get('predictions'): del result["response_selection_evaluation"]['predictions'] del result["response_selection_evaluation"]['report'] for r in response_selection_results: if r.intent_response_key_prediction == r.intent_response_key_target: pass # successes.append({ # "text": r.message, # "intent_response_key_target": r.intent_response_key_target, # "intent_response_key_prediction": { # "name": r.intent_response_key_prediction, # "confidence": r.confidence, # }, # }) else: if not Utility.check_empty_string(r.intent_response_key_target): errors.append( { "text": r.message, "intent_response_key_target": r.intent_response_key_target, "intent_response_key_prediction": { "name": r.intent_response_key_prediction, "confidence": r.confidence, }, } ) result["response_selection_evaluation"]['total_count'] = len(successes) + len(errors) result["response_selection_evaluation"]['success_count'] = len(successes) result["response_selection_evaluation"]['failure_count'] = len(errors) result["response_selection_evaluation"]['successes'] = successes result["response_selection_evaluation"]['errors'] = errors if any(entity_results): extractors = get_entity_extractors(interpreter) result["entity_evaluation"] = ModelTester.__evaluate_entities(entity_results, extractors) return result