def test_intent_evaluation_report_large(tmpdir_factory: TempdirFactory): path = tmpdir_factory.mktemp("evaluation") report_folder = path / "reports" report_filename = report_folder / "intent_report.json" rasa.utils.io.create_directory(str(report_folder)) def correct(label: Text) -> IntentEvaluationResult: return IntentEvaluationResult(label, label, "", 1.0) def incorrect(label: Text, _label: Text) -> IntentEvaluationResult: return IntentEvaluationResult(label, _label, "", 1.0) a_results = [correct("A")] * 10 b_results = [correct("B")] * 7 + [incorrect("B", "C")] * 3 c_results = [correct("C")] * 3 + [incorrect("C", "D") ] + [incorrect("C", "E")] d_results = [correct("D")] * 29 + [incorrect("D", "B")] * 3 e_results = [incorrect("E", "C")] * 5 + [incorrect("E", "")] * 5 intent_results = a_results + b_results + c_results + d_results + e_results evaluate_intents( intent_results, report_folder, successes=False, errors=False, confmat_filename=None, intent_hist_filename=None, disable_plotting=False, ) report = json.loads(rasa.utils.io.read_file(str(report_filename))) a_results = { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 10, "confused_with": {}, } e_results = { "precision": 0.0, "recall": 0.0, "f1-score": 0.0, "support": 10, "confused_with": { "C": 5, "": 5 }, } c_confused_with = {"D": 1, "E": 1} assert len(report.keys()) == 8 assert report["A"] == a_results assert report["E"] == e_results assert report["C"]["confused_with"] == c_confused_with
def test_intent_evaluation_report(tmpdir_factory): path = tmpdir_factory.mktemp("evaluation").strpath report_folder = os.path.join(path, "reports") report_filename = os.path.join(report_folder, "intent_report.json") utils.create_dir(report_folder) intent_results = [ IntentEvaluationResult("", "restaurant_search", "I am hungry", 0.12345), IntentEvaluationResult("greet", "greet", "hello", 0.98765)] result = evaluate_intents(intent_results, report_folder, successes_filename=None, errors_filename=None, confmat_filename=None, intent_hist_filename=None) report = json.loads(utils.read_file(report_filename)) greet_results = {"precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1} prediction = {'text': 'hello', 'intent': 'greet', 'predicted': 'greet', 'confidence': 0.98765} assert len(report.keys()) == 4 assert report["greet"] == greet_results assert result["predictions"][0] == prediction
def test_intent_evaluation_report(tmp_path: Path): path = tmp_path / "evaluation" path.mkdir() report_folder = str(path / "reports") report_filename = os.path.join(report_folder, "intent_report.json") rasa.shared.utils.io.create_directory(report_folder) intent_results = [ IntentEvaluationResult("", "restaurant_search", "I am hungry", 0.12345), IntentEvaluationResult("greet", "greet", "hello", 0.98765), ] result = evaluate_intents( intent_results, report_folder, successes=True, errors=True, disable_plotting=False, ) report = json.loads(rasa.shared.utils.io.read_file(report_filename)) greet_results = { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1, "confused_with": {}, } prediction = { "text": "hello", "intent": "greet", "predicted": "greet", "confidence": 0.98765, } assert len(report.keys()) == 4 assert report["greet"] == greet_results assert result["predictions"][0] == prediction assert os.path.exists( os.path.join(report_folder, "intent_confusion_matrix.png")) assert os.path.exists(os.path.join(report_folder, "intent_histogram.png")) assert not os.path.exists(os.path.join(report_folder, "intent_errors.json")) assert os.path.exists(os.path.join(report_folder, "intent_successes.json"))
def test_intent_evaluation_report(tmpdir_factory): path = tmpdir_factory.mktemp("evaluation").strpath report_folder = os.path.join(path, "reports") report_filename = os.path.join(report_folder, "intent_report.json") rasa.utils.io.create_directory(report_folder) intent_results = [ IntentEvaluationResult("", "restaurant_search", "I am hungry", 0.12345), IntentEvaluationResult("greet", "greet", "hello", 0.98765), ] result = evaluate_intents( intent_results, report_folder, successes=False, errors=False, confmat_filename=None, intent_hist_filename=None, disable_plotting=False, ) report = json.loads(rasa.utils.io.read_file(report_filename)) greet_results = { "precision": 1.0, "recall": 1.0, "f1-score": 1.0, "support": 1, "confused_with": {}, } prediction = { "text": "hello", "intent": "greet", "predicted": "greet", "confidence": 0.98765, } assert len(report.keys()) == 4 assert report["greet"] == greet_results assert result["predictions"][0] == prediction
def run_test_on_nlu(nlu_path: str, model_path: str): """ Run tests on stories. Args: nlu_path: path where nlu test data is present as YAML. model_path: Model path where model on which test has to be run is present. Returns: dictionary with evaluation results """ from rasa.model import get_model import rasa.shared.nlu.training_data.loading from rasa.nlu.model import Interpreter from rasa.nlu.test import ( remove_pretrained_extractors, get_eval_data, evaluate_intents, evaluate_response_selections, get_entity_extractors, ) from kairon import Utility unpacked_model = get_model(model_path) nlu_model = os.path.join(unpacked_model, "nlu") interpreter = Interpreter.load(nlu_model) interpreter.pipeline = remove_pretrained_extractors(interpreter.pipeline) test_data = rasa.shared.nlu.training_data.loading.load_data( nlu_path, interpreter.model_metadata.language ) result: Dict[Text, Optional[Dict]] = { "intent_evaluation": None, "entity_evaluation": None, "response_selection_evaluation": None, } (intent_results, response_selection_results, entity_results) = get_eval_data( interpreter, test_data ) if intent_results: successes = [] errors = [] result["intent_evaluation"] = evaluate_intents(intent_results, None, False, False, True) if result["intent_evaluation"].get('predictions'): del result["intent_evaluation"]['predictions'] del result["intent_evaluation"]['report'] for r in intent_results: if r.intent_target == r.intent_prediction: pass # successes.append({ # "text": r.message, # "intent": r.intent_target, # "intent_prediction": { # 'name': r.intent_prediction, # "confidence": r.confidence, # }, # }) else: errors.append({ "text": r.message, "intent": r.intent_target, "intent_prediction": { 'name': r.intent_prediction, "confidence": r.confidence, }, }) result["intent_evaluation"]['total_count'] = len(successes) + len(errors) result["intent_evaluation"]['success_count'] = len(successes) result["intent_evaluation"]['failure_count'] = len(errors) result["intent_evaluation"]['successes'] = successes result["intent_evaluation"]['errors'] = errors if response_selection_results: successes = [] errors = [] result["response_selection_evaluation"] = evaluate_response_selections( response_selection_results, None, False, False, True ) if result["response_selection_evaluation"].get('predictions'): del result["response_selection_evaluation"]['predictions'] del result["response_selection_evaluation"]['report'] for r in response_selection_results: if r.intent_response_key_prediction == r.intent_response_key_target: pass # successes.append({ # "text": r.message, # "intent_response_key_target": r.intent_response_key_target, # "intent_response_key_prediction": { # "name": r.intent_response_key_prediction, # "confidence": r.confidence, # }, # }) else: if not Utility.check_empty_string(r.intent_response_key_target): errors.append( { "text": r.message, "intent_response_key_target": r.intent_response_key_target, "intent_response_key_prediction": { "name": r.intent_response_key_prediction, "confidence": r.confidence, }, } ) result["response_selection_evaluation"]['total_count'] = len(successes) + len(errors) result["response_selection_evaluation"]['success_count'] = len(successes) result["response_selection_evaluation"]['failure_count'] = len(errors) result["response_selection_evaluation"]['successes'] = successes result["response_selection_evaluation"]['errors'] = errors if any(entity_results): extractors = get_entity_extractors(interpreter) result["entity_evaluation"] = ModelTester.__evaluate_entities(entity_results, extractors) return result