def convert_training_data(data_file: Union[list, Text], out_file: Text, output_format: Text, language: Text) -> None: """Convert training data. Args: data_file (Union[list, Text]): Path to the file or directory containing Rasa data. out_file (Text): File or existing path where to save training data in Rasa format. output_format (Text): Output format the training data should be converted into. language (Text): Language of the data. """ if isinstance(data_file, list): data_file = data_file[0] if not os.path.exists(str(data_file)): print_error( "Data file '{}' does not exist. Provide a valid NLU data file using " "the '--data' argument.".format(data_file)) return td = rasa.shared.nlu.training_data.loading.load_data(data_file, language) if output_format == "json": output = td.nlu_as_json(indent=2) else: output = RasaYAMLWriter().dumps(td) write_to_file(out_file, output)
def convert_training_data(data_file, out_file, output_format, language): td = training_data.load_data(data_file, language) if output_format == 'md': output = td.as_markdown() else: output = td.as_json(indent=2) write_to_file(out_file, output)
def test_remove_model_invalid(empty_model_dir): test_file = "something.else" test_content = "Some other stuff" test_file_path = os.path.join(empty_model_dir, test_file) write_to_file(test_file_path, test_content) with pytest.raises(ValueError) as e: remove_model(empty_model_dir) os.remove(test_file_path)
def test_remove_model_invalid(empty_model_dir): test_file = "something.else" test_content = "Some other stuff" test_file_path = os.path.join(empty_model_dir, test_file) utils.write_to_file(test_file_path, test_content) with pytest.raises(RasaException): utils.remove_model(empty_model_dir) os.remove(test_file_path)
def dump_stories_to_file_system(self, original_story_filename: Text) -> None: """Dump Rasa Core stories in database to file.""" if not original_story_filename: logger.error( "Failed to dump stories to the file: original file not found") logger.debug(f"Dumping stories to file '{original_story_filename}'.") stories = self.fetch_stories(None, filename=original_story_filename) markdown = self.get_stories_markdown(stories) write_to_file(original_story_filename, markdown)
def persist( self, dir_name: Text, filename: Text = DEFAULT_TRAINING_DATA_OUTPUT_PATH ) -> Dict[Text, Any]: """Persists this training data to disk and returns necessary information to load it again.""" if not os.path.exists(dir_name): os.makedirs(dir_name) data_file = os.path.join(dir_name, filename) write_to_file(data_file, self.as_json(indent=2)) return {"training_data": DEFAULT_TRAINING_DATA_OUTPUT_PATH}
def convert_training_data( data_file: Text, out_file: Text, output_format: Text, language: Text ): if not os.path.exists(data_file): print_error("Data file '{}' does not exist.".format(data_file)) return if output_format == "json": td = training_data.load_data(data_file, language) output = td.as_json(indent=2) elif output_format == "md": td = training_data.load_data(data_file, language) output = td.as_markdown() else: print_error( "Did not recognize data format. Supported data formats: 'json' and 'md'." ) return write_to_file(out_file, output)
def convert_training_data(data_file: Text, out_file: Text, output_format: Text, language: Text): if not os.path.exists(data_file): print_error( "Data file '{}' does not exist. Provide a valid NLU data file using " "the '--data' argument.".format(data_file)) return if output_format == "json": td = training_data.load_data(data_file, language) output = td.as_json(indent=2) elif output_format == "md": td = training_data.load_data(data_file, language) output = td.as_markdown() else: print_error( "Did not recognize output format. Supported output formats: 'json' and " "'md'. Specify the desired output format with '--format'.") return write_to_file(out_file, output)
def persist( self, dir_name: Text, filename: Text = DEFAULT_TRAINING_DATA_OUTPUT_PATH ) -> Dict[Text, Any]: """Persists this training data to disk and returns necessary information to load it again.""" if not os.path.exists(dir_name): os.makedirs(dir_name) data_file = os.path.join(dir_name, filename) if data_file.endswith("json"): write_to_file(data_file, self.as_json(indent=2)) elif data_file.endswith("md"): write_to_file(data_file, self.as_markdown()) else: ValueError( "Unsupported file format detected. Supported file formats are 'json' " "and 'md'." ) return {"training_data": relpath(data_file, dir_name)}
def convert_training_data( data_file: Union[list, Text], out_file: Text, output_format: Text, language: Text ) -> None: """Convert training data. Args: data_file (Union[list, Text]): Path to the file or directory containing Rasa data. out_file (Text): File or existing path where to save training data in Rasa format. output_format (Text): Output format the training data should be converted into. language (Text): Language of the data. """ if isinstance(data_file, list): data_file = data_file[0] if not os.path.exists(str(data_file)): print_error( "Data file '{}' does not exist. Provide a valid NLU data file using " "the '--data' argument.".format(data_file) ) return if output_format == "json": td = rasa.shared.nlu.training_data.loading.load_data(data_file, language) output = td.nlu_as_json(indent=2) elif output_format == "md": td = rasa.shared.nlu.training_data.loading.load_data(data_file, language) output = td.nlu_as_markdown() else: print_error( "Did not recognize output format. Supported output formats: 'json' and " "'md'. Specify the desired output format with '--format'." ) return write_to_file(out_file, output)
def compare_nlu( configs: List[Text], data: TrainingData, exclusion_percentages: List[int], f_score_results: Dict[Text, Any], model_names: List[Text], output: Text, runs: int, ) -> List[int]: """ Trains and compares multiple NLU models. For each run and exclusion percentage a model per config file is trained. Thereby, the model is trained only on the current percentage of training data. Afterwards, the model is tested on the complete test data of that run. All results are stored in the provided output directory. Args: configs: config files needed for training data: training data exclusion_percentages: percentages of training data to exclude during comparison f_score_results: dictionary of model name to f-score results per run model_names: names of the models to train output: the output directory runs: number of comparison runs Returns: training examples per run """ training_examples_per_run = [] for run in range(runs): logger.info("Beginning comparison run {}/{}".format(run + 1, runs)) run_path = os.path.join(output, "run_{}".format(run + 1)) create_path(run_path) test_path = os.path.join(run_path, TEST_DATA_FILE) create_path(test_path) train, test = data.train_test_split() write_to_file(test_path, test.as_markdown()) training_examples_per_run = [] for percentage in exclusion_percentages: percent_string = "{}%_exclusion".format(percentage) _, train = train.train_test_split(percentage / 100) training_examples_per_run.append(len(train.training_examples)) model_output_path = os.path.join(run_path, percent_string) train_split_path = os.path.join(model_output_path, TRAIN_DATA_FILE) create_path(train_split_path) write_to_file(train_split_path, train.as_markdown()) for nlu_config, model_name in zip(configs, model_names): logger.info( "Evaluating configuration '{}' with {} training data.". format(model_name, percent_string)) try: model_path = train_nlu( nlu_config, train_split_path, model_output_path, fixed_model_name=model_name, ) except Exception as e: logger.warning( "Training model '{}' failed. Error: {}".format( model_name, str(e))) f_score_results[model_name][run].append(0.0) continue model_path = os.path.join(get_model(model_path), "nlu") report_path = os.path.join(model_output_path, "{}_report".format(model_name)) errors_path = os.path.join(report_path, "errors.json") result = run_evaluation(test_path, model_path, report=report_path, errors=errors_path) f1 = result["intent_evaluation"]["f1_score"] f_score_results[model_name][run].append(f1) return training_examples_per_run
def save_json(data, filename): """Write out nlu classification to a file.""" utils.write_to_file(filename, json.dumps(data, indent=4, ensure_ascii=False))
def dump(self, filename, training_data): """Writes a TrainingData object in markdown format to a file.""" s = self.dumps(training_data) utils.write_to_file(filename, s)