def _write_nlu_to_file( export_nlu_path: Text, evts: List[Dict[Text, Any]] ) -> None: """Write the nlu data of the sender_id to the file paths.""" msgs = _collect_messages(evts) # noinspection PyBroadException try: previous_examples = load_data(export_nlu_path) except Exception as e: logger.exception("An exception occurred while trying to load the " "NLU data.") export_nlu_path = questionary.text( message="Could not load existing NLU data, please " "specify where to store NLU data learned in " "this session (this will overwrite any " "existing file). {}".format(str(e)), default=PATHS["backup"]).ask() if export_nlu_path is None: return previous_examples = TrainingData() nlu_data = previous_examples.merge(TrainingData(msgs)) with io.open(export_nlu_path, 'w', encoding="utf-8") as f: if _guess_format(export_nlu_path) in {"md", "unk"}: f.write(nlu_data.as_markdown()) else: f.write(nlu_data.as_json())
def _write_nlu_to_file( export_nlu_path: Text, evts: List[Dict[Text, Any]] ) -> None: """Write the nlu data of the sender_id to the file paths.""" msgs = _collect_messages(evts) # noinspection PyBroadException try: previous_examples = load_data(export_nlu_path) except Exception: questions = [{"name": "export nlu", "type": "input", "message": "Could not load existing NLU data, please " "specify where to store NLU data learned in " "this session (this will overwrite any " "existing file)", "default": PATHS["backup"]}] answers = prompt(questions) export_nlu_path = answers["export nlu"] previous_examples = TrainingData() nlu_data = previous_examples.merge(TrainingData(msgs)) with io.open(export_nlu_path, 'w', encoding="utf-8") as f: if _guess_format(export_nlu_path) in {"md", "unk"}: f.write(nlu_data.as_markdown()) else: f.write(nlu_data.as_json())
def split_nlu_data(args): from rasa_nlu.training_data.loading import load_data data_path = get_validated_path(args.nlu, "nlu", DEFAULT_DATA_PATH) data_path = data.get_nlu_directory(data_path) nlu_data = load_data(data_path) train, test = nlu_data.train_test_split(args.training_fraction) train.persist(args.out, filename="training_data.json") test.persist(args.out, filename="test_data.json")
def train_nlu(): from rasa_nlu import utils, config from rasa_nlu.training_data.loading import load_data from rasa_nlu.model import Trainer from rasa_nlu.config import RasaNLUModelConfig nlu_config = config.load(os.path.join(current_dir, "sample/nlu_config.yml")) nlu_trainer = Trainer(nlu_config) nlu_training_data = load_data(os.path.join(current_dir, "sample/nlu")) nlu_trainer.train(nlu_training_data) nlu_trainer.persist(os.path.join(current_dir, "sample/models/current/nlu"), fixed_model_name="default")
def train_nlu(): from rasa_nlu.training_data.loading import load_data from rasa_nlu import config from rasa_nlu.model import Trainer training_data = load_data("data/train_file_news.json") trainer = Trainer(config.load("data/nlu_model_config.yml")) trainer.train(training_data) model_directory = trainer.persist("models/", project_name="news", fixed_model_name="demo") return model_directory
def train_nlu(): from rasa_nlu.training_data.loading import load_data # 新api,会将目录下的所有文件合并 from rasa_nlu.config import RasaNLUModelConfig#新 API from rasa_nlu.model import Trainer from rasa_nlu.config import load # 生成46634意图样本,22003实体样本 training_data = load_data("nlu_data/train_data") trainer = Trainer(load("pipeline_config.yaml"))# load的返回值就是一个RasaNLUModelConfig对象,而且其初始化需要传入的不是文件名,而是读取的配置文件内容,一个字典 trainer.train(training_data) model_directory = trainer.persist("models/", project_name="nlu",fixed_model_name="model_ner_reg_all") # model_directory = trainer.persist("models/", project_name="ivr", fixed_model_name="demo") return model_directory
def evaluate(self, data, project=None, model=None): # type: (Text, Optional[Text], Optional[Text]) -> Dict[Text, Any] """Perform a model evaluation.""" project = project or RasaNLUModelConfig.DEFAULT_PROJECT_NAME model = model or None file_name = utils.create_temporary_file(data, "_training_data") test_data = load_data(file_name) if project not in self.project_store: raise InvalidProjectError("Project {} could not " "be found".format(project)) preds_json = self.parse_training_examples(test_data.intent_examples, project, model) predictions = [ {"text": e.text, "intent": e.data.get("intent"), "predicted": p.get("intent", {}).get("name"), "confidence": p.get("intent", {}).get("confidence")} for e, p in zip(test_data.intent_examples, preds_json) ] y_true = [e.data.get("intent") for e in test_data.intent_examples] y_true = clean_intent_labels(y_true) y_pred = [p.get("intent", {}).get("name") for p in preds_json] y_pred = clean_intent_labels(y_pred) report, precision, f1, accuracy = get_evaluation_metrics(y_true, y_pred) return { "intent_evaluation": { "report": report, "predictions": predictions, "precision": precision, "f1_score": f1, "accuracy": accuracy} }