async def _write_domain_to_file( domain_path: Text, events: List[Dict[Text, Any]], endpoint: EndpointConfig ) -> None: """Write an updated domain file to the file path.""" create_path(domain_path) domain = await retrieve_domain(endpoint) old_domain = Domain.from_dict(domain) messages = _collect_messages(events) actions = _collect_actions(events) templates = NEW_TEMPLATES # TODO for now there is no way to distinguish between action and form collected_actions = list( {e["name"] for e in actions if e["name"] not in default_action_names()} ) new_domain = Domain( intents=_intents_from_messages(messages), entities=_entities_from_messages(messages), slots=[], templates=templates, action_names=collected_actions, form_names=[], ) old_domain.merge(new_domain).persist_clean(domain_path)
def write_text_to_file(filename: Union[Text, Path], content: Text) -> None: """Writes text to a file.""" from rasa.utils import io as io_utils # Create parent directories io_utils.create_path(filename) with open(filename, "w", encoding="utf-8") as f: f.write(content)
def compare_nlu_models( configs: List[Text], nlu: Text, output: Text, runs: int, exclusion_percentages: List[int], ): """Trains multiple models, compares them and saves the results.""" from rasa.nlu.test import drop_intents_below_freq from rasa.nlu.training_data import load_data from rasa.nlu.utils import write_json_to_file from rasa.utils.io import create_path from rasa.nlu.test import compare_nlu from rasa.core.test import plot_nlu_results data = load_data(nlu) data = drop_intents_below_freq(data, cutoff=5) create_path(output) bases = [os.path.basename(nlu_config) for nlu_config in configs] model_names = [os.path.splitext(base)[0] for base in bases] f1_score_results = { model_name: [[] for _ in range(runs)] for model_name in model_names } training_examples_per_run = compare_nlu( configs, data, exclusion_percentages, f1_score_results, model_names, output, runs, ) f1_path = os.path.join(output, RESULTS_FILE) write_json_to_file(f1_path, f1_score_results) plot_nlu_results(output, training_examples_per_run)
async def _write_stories_to_file( export_story_path: Text, events: List[Dict[Text, Any]] ) -> None: """Write the conversation of the sender_id to the file paths.""" sub_conversations = _split_conversation_at_restarts(events) create_path(export_story_path) if os.path.exists(export_story_path): append_write = "a" # append if already exists else: append_write = "w" # make a new file if not with open(export_story_path, append_write, encoding="utf-8") as f: for conversation in sub_conversations: parsed_events = rasa.core.events.deserialise_events(conversation) s = Story.from_events(parsed_events) f.write("\n" + s.as_story_string(flat=True))
def compare_nlu( configs: List[Text], data: TrainingData, exclusion_percentages: List[int], f_score_results: Dict[Text, Any], model_names: List[Text], output: Text, runs: int, ) -> List[int]: """ Trains and compares multiple NLU models. For each run and exclusion percentage a model per config file is trained. Thereby, the model is trained only on the current percentage of training data. Afterwards, the model is tested on the complete test data of that run. All results are stored in the provided output directory. Args: configs: config files needed for training data: training data exclusion_percentages: percentages of training data to exclude during comparison f_score_results: dictionary of model name to f-score results per run model_names: names of the models to train output: the output directory runs: number of comparison runs Returns: training examples per run """ training_examples_per_run = [] for run in range(runs): logger.info("Beginning comparison run {}/{}".format(run + 1, runs)) run_path = os.path.join(output, "run_{}".format(run + 1)) create_path(run_path) test_path = os.path.join(run_path, TEST_DATA_FILE) create_path(test_path) train, test = data.train_test_split() write_to_file(test_path, test.as_markdown()) training_examples_per_run = [] for percentage in exclusion_percentages: percent_string = "{}%_exclusion".format(percentage) _, train = train.train_test_split(percentage / 100) training_examples_per_run.append(len(train.training_examples)) model_output_path = os.path.join(run_path, percent_string) train_split_path = os.path.join(model_output_path, TRAIN_DATA_FILE) create_path(train_split_path) write_to_file(train_split_path, train.as_markdown()) for nlu_config, model_name in zip(configs, model_names): logger.info( "Evaluating configuration '{}' with {} training data.". format(model_name, percent_string)) try: model_path = train_nlu( nlu_config, train_split_path, model_output_path, fixed_model_name=model_name, ) except Exception as e: logger.warning( "Training model '{}' failed. Error: {}".format( model_name, str(e))) f_score_results[model_name][run].append(0.0) continue model_path = os.path.join(get_model(model_path), "nlu") report_path = os.path.join(model_output_path, "{}_report".format(model_name)) errors_path = os.path.join(report_path, "errors.json") result = run_evaluation(test_path, model_path, report=report_path, errors=errors_path) f1 = result["intent_evaluation"]["f1_score"] f_score_results[model_name][run].append(f1) return training_examples_per_run