Esempio n. 1
0
def convert_training_data(data_file: Union[list, Text], out_file: Text,
                          output_format: Text, language: Text) -> None:
    """Convert training data.

    Args:
        data_file (Union[list, Text]): Path to the file or directory
            containing Rasa data.
        out_file (Text): File or existing path where to save
            training data in Rasa format.
        output_format (Text): Output format the training data
            should be converted into.
        language (Text): Language of the data.
    """
    if isinstance(data_file, list):
        data_file = data_file[0]

    if not os.path.exists(str(data_file)):
        print_error(
            "Data file '{}' does not exist. Provide a valid NLU data file using "
            "the '--data' argument.".format(data_file))
        return

    td = rasa.shared.nlu.training_data.loading.load_data(data_file, language)
    if output_format == "json":
        output = td.nlu_as_json(indent=2)
    else:
        output = RasaYAMLWriter().dumps(td)

    write_to_file(out_file, output)
Esempio n. 2
0
def convert_training_data(data_file, out_file, output_format, language):
    td = training_data.load_data(data_file, language)

    if output_format == 'md':
        output = td.as_markdown()
    else:
        output = td.as_json(indent=2)

    write_to_file(out_file, output)
Esempio n. 3
0
def test_remove_model_invalid(empty_model_dir):
    test_file = "something.else"
    test_content = "Some other stuff"
    test_file_path = os.path.join(empty_model_dir, test_file)
    write_to_file(test_file_path, test_content)

    with pytest.raises(ValueError) as e:
        remove_model(empty_model_dir)

    os.remove(test_file_path)
Esempio n. 4
0
def test_remove_model_invalid(empty_model_dir):
    test_file = "something.else"
    test_content = "Some other stuff"
    test_file_path = os.path.join(empty_model_dir, test_file)
    utils.write_to_file(test_file_path, test_content)

    with pytest.raises(RasaException):
        utils.remove_model(empty_model_dir)

    os.remove(test_file_path)
Esempio n. 5
0
    def dump_stories_to_file_system(self,
                                    original_story_filename: Text) -> None:
        """Dump Rasa Core stories in database to file."""
        if not original_story_filename:
            logger.error(
                "Failed to dump stories to the file: original file not found")

        logger.debug(f"Dumping stories to file '{original_story_filename}'.")
        stories = self.fetch_stories(None, filename=original_story_filename)
        markdown = self.get_stories_markdown(stories)
        write_to_file(original_story_filename, markdown)
Esempio n. 6
0
    def persist(
            self,
            dir_name: Text,
            filename: Text = DEFAULT_TRAINING_DATA_OUTPUT_PATH
    ) -> Dict[Text, Any]:
        """Persists this training data to disk and returns necessary
        information to load it again."""

        if not os.path.exists(dir_name):
            os.makedirs(dir_name)

        data_file = os.path.join(dir_name, filename)
        write_to_file(data_file, self.as_json(indent=2))

        return {"training_data": DEFAULT_TRAINING_DATA_OUTPUT_PATH}
Esempio n. 7
0
def convert_training_data(
    data_file: Text, out_file: Text, output_format: Text, language: Text
):
    if not os.path.exists(data_file):
        print_error("Data file '{}' does not exist.".format(data_file))
        return

    if output_format == "json":
        td = training_data.load_data(data_file, language)
        output = td.as_json(indent=2)
    elif output_format == "md":
        td = training_data.load_data(data_file, language)
        output = td.as_markdown()
    else:
        print_error(
            "Did not recognize data format. Supported data formats: 'json' and 'md'."
        )
        return

    write_to_file(out_file, output)
Esempio n. 8
0
def convert_training_data(data_file: Text, out_file: Text, output_format: Text,
                          language: Text):
    if not os.path.exists(data_file):
        print_error(
            "Data file '{}' does not exist. Provide a valid NLU data file using "
            "the '--data' argument.".format(data_file))
        return

    if output_format == "json":
        td = training_data.load_data(data_file, language)
        output = td.as_json(indent=2)
    elif output_format == "md":
        td = training_data.load_data(data_file, language)
        output = td.as_markdown()
    else:
        print_error(
            "Did not recognize output format. Supported output formats: 'json' and "
            "'md'. Specify the desired output format with '--format'.")
        return

    write_to_file(out_file, output)
Esempio n. 9
0
    def persist(
        self, dir_name: Text, filename: Text = DEFAULT_TRAINING_DATA_OUTPUT_PATH
    ) -> Dict[Text, Any]:
        """Persists this training data to disk and returns necessary
        information to load it again."""

        if not os.path.exists(dir_name):
            os.makedirs(dir_name)

        data_file = os.path.join(dir_name, filename)

        if data_file.endswith("json"):
            write_to_file(data_file, self.as_json(indent=2))
        elif data_file.endswith("md"):
            write_to_file(data_file, self.as_markdown())
        else:
            ValueError(
                "Unsupported file format detected. Supported file formats are 'json' "
                "and 'md'."
            )

        return {"training_data": relpath(data_file, dir_name)}
Esempio n. 10
0
def convert_training_data(
    data_file: Union[list, Text], out_file: Text, output_format: Text, language: Text
) -> None:
    """Convert training data.

    Args:
        data_file (Union[list, Text]): Path to the file or directory
            containing Rasa data.
        out_file (Text): File or existing path where to save
            training data in Rasa format.
        output_format (Text): Output format the training data
            should be converted into.
        language (Text): Language of the data.
    """
    if isinstance(data_file, list):
        data_file = data_file[0]

    if not os.path.exists(str(data_file)):
        print_error(
            "Data file '{}' does not exist. Provide a valid NLU data file using "
            "the '--data' argument.".format(data_file)
        )
        return

    if output_format == "json":
        td = rasa.shared.nlu.training_data.loading.load_data(data_file, language)
        output = td.nlu_as_json(indent=2)
    elif output_format == "md":
        td = rasa.shared.nlu.training_data.loading.load_data(data_file, language)
        output = td.nlu_as_markdown()
    else:
        print_error(
            "Did not recognize output format. Supported output formats: 'json' and "
            "'md'. Specify the desired output format with '--format'."
        )
        return

    write_to_file(out_file, output)
Esempio n. 11
0
def compare_nlu(
    configs: List[Text],
    data: TrainingData,
    exclusion_percentages: List[int],
    f_score_results: Dict[Text, Any],
    model_names: List[Text],
    output: Text,
    runs: int,
) -> List[int]:
    """
    Trains and compares multiple NLU models.
    For each run and exclusion percentage a model per config file is trained.
    Thereby, the model is trained only on the current percentage of training data.
    Afterwards, the model is tested on the complete test data of that run.
    All results are stored in the provided output directory.

    Args:
        configs: config files needed for training
        data: training data
        exclusion_percentages: percentages of training data to exclude during comparison
        f_score_results: dictionary of model name to f-score results per run
        model_names: names of the models to train
        output: the output directory
        runs: number of comparison runs

    Returns: training examples per run
    """

    training_examples_per_run = []

    for run in range(runs):

        logger.info("Beginning comparison run {}/{}".format(run + 1, runs))

        run_path = os.path.join(output, "run_{}".format(run + 1))
        create_path(run_path)

        test_path = os.path.join(run_path, TEST_DATA_FILE)
        create_path(test_path)

        train, test = data.train_test_split()
        write_to_file(test_path, test.as_markdown())

        training_examples_per_run = []

        for percentage in exclusion_percentages:
            percent_string = "{}%_exclusion".format(percentage)

            _, train = train.train_test_split(percentage / 100)
            training_examples_per_run.append(len(train.training_examples))

            model_output_path = os.path.join(run_path, percent_string)
            train_split_path = os.path.join(model_output_path, TRAIN_DATA_FILE)
            create_path(train_split_path)
            write_to_file(train_split_path, train.as_markdown())

            for nlu_config, model_name in zip(configs, model_names):
                logger.info(
                    "Evaluating configuration '{}' with {} training data.".
                    format(model_name, percent_string))

                try:
                    model_path = train_nlu(
                        nlu_config,
                        train_split_path,
                        model_output_path,
                        fixed_model_name=model_name,
                    )
                except Exception as e:
                    logger.warning(
                        "Training model '{}' failed. Error: {}".format(
                            model_name, str(e)))
                    f_score_results[model_name][run].append(0.0)
                    continue

                model_path = os.path.join(get_model(model_path), "nlu")

                report_path = os.path.join(model_output_path,
                                           "{}_report".format(model_name))
                errors_path = os.path.join(report_path, "errors.json")
                result = run_evaluation(test_path,
                                        model_path,
                                        report=report_path,
                                        errors=errors_path)

                f1 = result["intent_evaluation"]["f1_score"]
                f_score_results[model_name][run].append(f1)

    return training_examples_per_run
Esempio n. 12
0
def save_json(data, filename):
    """Write out nlu classification to a file."""

    utils.write_to_file(filename, json.dumps(data,
                                             indent=4,
                                             ensure_ascii=False))
 def dump(self, filename, training_data):
     """Writes a TrainingData object in markdown format to a file."""
     s = self.dumps(training_data)
     utils.write_to_file(filename, s)