Esempio n. 1
0
def _write_nlu_to_file(
    export_nlu_path: Text,
    evts: List[Dict[Text, Any]]
) -> None:
    """Write the nlu data of the sender_id to the file paths."""

    msgs = _collect_messages(evts)

    # noinspection PyBroadException
    try:
        previous_examples = load_data(export_nlu_path)
    except Exception as e:
        logger.exception("An exception occurred while trying to load the "
                         "NLU data.")

        export_nlu_path = questionary.text(
            message="Could not load existing NLU data, please "
                    "specify where to store NLU data learned in "
                    "this session (this will overwrite any "
                    "existing file). {}".format(str(e)),
            default=PATHS["backup"]).ask()

        if export_nlu_path is None:
            return

        previous_examples = TrainingData()

    nlu_data = previous_examples.merge(TrainingData(msgs))

    with io.open(export_nlu_path, 'w', encoding="utf-8") as f:
        if _guess_format(export_nlu_path) in {"md", "unk"}:
            f.write(nlu_data.as_markdown())
        else:
            f.write(nlu_data.as_json())
Esempio n. 2
0
    def get_data(self, language):
        lookup_tables = []
        composite_entities = []

        cmdline_args = create_argument_parser().parse_args()
        files = utils.list_files(cmdline_args.data)

        for file in files:
            fformat = _guess_format(file)
            file_content = utils.read_json_file(file)
            if fformat == DIALOGFLOW_ENTITIES:
                entity = file_content['name']
                dialogflowReader = DialogflowReader()
                examples_js = dialogflowReader._read_examples_js(fn=file, language=language, fformat=fformat)
                lookup_table = self._extract_lookup_tables(entity, examples_js)
                if(lookup_table):
                    lookup_tables.append(lookup_table)
                composite_entity = self._extract_composite_entities(
                            entity,
                            examples_js)
                if(composite_entity):
                    composite_entities.append(composite_entity)

            if fformat == RASA_NLU:
                rasa_nlu_data = file_content['rasa_nlu_data']
                composite_entities = rasa_nlu_data['composite_entities']
                lookup_tables = rasa_nlu_data['lookup_tables']

        return lookup_tables, composite_entities
Esempio n. 3
0
def _write_nlu_to_file(
    export_nlu_path: Text,
    evts: List[Dict[Text, Any]]
) -> None:
    """Write the nlu data of the sender_id to the file paths."""

    msgs = _collect_messages(evts)

    # noinspection PyBroadException
    try:
        previous_examples = load_data(export_nlu_path)

    except Exception:
        questions = [{"name": "export nlu",
                      "type": "input",
                      "message": "Could not load existing NLU data, please "
                                 "specify where to store NLU data learned in "
                                 "this session (this will overwrite any "
                                 "existing file)",
                      "default": PATHS["backup"]}]

        answers = prompt(questions)
        export_nlu_path = answers["export nlu"]
        previous_examples = TrainingData()

    nlu_data = previous_examples.merge(TrainingData(msgs))

    with io.open(export_nlu_path, 'w', encoding="utf-8") as f:
        if _guess_format(export_nlu_path) in {"md", "unk"}:
            f.write(nlu_data.as_markdown())
        else:
            f.write(nlu_data.as_json())
Esempio n. 4
0
 def _get_train_file_cmd():
     """Get the raw train data by fetching the train file given in the
     command line arguments to the train script.
     """
     cmdline_args = create_argument_parser().parse_args()
     files = utils.list_files(cmdline_args.data)
     is_rasa_format = [_guess_format(file) == RASA_NLU for file in files]
     n_rasa_format = sum(is_rasa_format)
     # TODO: Support multiple training files
     assert sum(is_rasa_format) == 1, 'Composite entities currently ' \
             'only work with exactly one train file.'
     file_index = [i for i, val in enumerate(is_rasa_format) if val][0]
     return files[file_index]