def evaluate(self, data, project=None, model=None): # type: (Text, Optional[Text], Optional[Text]) -> Dict[Text, Any] """Perform a model evaluation.""" project = project or RasaNLUModelConfig.DEFAULT_PROJECT_NAME model = model or None file_name = utils.create_temporary_file(data, "_training_data") test_data = load_data(file_name) if project not in self.project_store: raise InvalidProjectError("Project {} could not " "be found".format(project)) preds_json = self.parse_training_examples(test_data.intent_examples, project, model) predictions = [ {"text": e.text, "intent": e.data.get("intent"), "predicted": p.get("intent", {}).get("name"), "confidence": p.get("intent", {}).get("confidence")} for e, p in zip(test_data.intent_examples, preds_json) ] y_true = [e.data.get("intent") for e in test_data.intent_examples] y_true = clean_intent_labels(y_true) y_pred = [p.get("intent", {}).get("name") for p in preds_json] y_pred = clean_intent_labels(y_pred) report, precision, f1, accuracy = get_evaluation_metrics(y_true, y_pred) return { "intent_evaluation": { "report": report, "predictions": predictions, "precision": precision, "f1_score": f1, "accuracy": accuracy} }