Ejemplo n.º 1
0
    def sample_sentences(intent_name: str, intent_fst_path: str):
        rand_fst = fst.Fst.read_from_string(
            subprocess.check_output(
                ["fstrandgen", f"--npath={num_samples}", intent_fst_path]))

        sentences: List[Dict[str, Any]] = []
        for symbols in fstprintall(rand_fst, exclude_meta=False):
            intent = symbols2intent(symbols)
            sentences.append(intent)

        return sentences
Ejemplo n.º 2
0
def make_sentences_by_intent(intent_fst: fst.Fst) -> Dict[str, Any]:
    """Get all sentences from an FST."""
    from rhasspy.train.jsgf2fst import fstprintall, symbols2intent

    # { intent: [ { 'text': ..., 'entities': { ... } }, ... ] }
    sentences_by_intent: Dict[str, Any] = defaultdict(list)

    for symbols in fstprintall(intent_fst, exclude_meta=False):
        intent = symbols2intent(symbols)
        intent_name = intent["intent"]["name"]
        sentences_by_intent[intent_name].append(intent)

    return sentences_by_intent
Ejemplo n.º 3
0
    def train(self, intent_fst: fst.Fst):
        """Train using an external program."""
        self._logger.debug(self.command)

        try:
            # { intent: [ { 'text': ..., 'entities': { ... } }, ... ] }
            sentences_by_intent: Dict[str, Any] = defaultdict(list)

            for symbols in fstprintall(intent_fst, exclude_meta=False):
                intent = symbols2intent(symbols)
                intent_name = intent["intent"]["name"]
                sentences_by_intent[intent_name].append(intent)

            # JSON -> STDIN
            json_input = json.dumps(sentences_by_intent).encode()

            subprocess.run(self.command, input=json_input, check=True)
        except Exception:
            self._logger.exception("train")
Ejemplo n.º 4
0
    def train(self, intent_fst) -> None:
        """Convert examples to Markdown and POST to RasaNLU server."""
        from rhasspy.train.jsgf2fst import fstprintall
        import requests

        # Load settings
        language = self.profile.get("language", "en")
        rasa_config = self.profile.get("intent.rasa", {})

        url = rasa_config.get("url", "http://localhost:5005")
        project_name = rasa_config.get("project_name", "rhasspy")

        # Create markdown examples
        examples_md_path = self.profile.write_path(
            rasa_config.get("examples_markdown", "intent_examples.md"))

        # Build Markdown sentences
        sentences_by_intent: Dict[str, Any] = defaultdict(list)
        for symbols in fstprintall(intent_fst, exclude_meta=False):
            intent_name = ""
            strings = []
            for sym in symbols:
                if sym.startswith("<"):
                    continue  # <eps>

                if sym.startswith("__label__"):
                    intent_name = sym[9:]
                elif sym.startswith("__begin__"):
                    strings.append("[")
                elif sym.startswith("__end__"):
                    strings[-1] = strings[-1].strip()
                    tag = sym[7:]
                    strings.append(f"]({tag})")
                    strings.append(" ")
                else:
                    strings.append(sym)
                    strings.append(" ")

            sentence = "".join(strings).strip()
            sentences_by_intent[intent_name].append(sentence)

        # Write to YAML file
        with open(examples_md_path, "w") as examples_md_file:
            for intent_name, intent_sents in sentences_by_intent.items():
                # Rasa Markdown training format
                print(f"## intent:{intent_name}", file=examples_md_file)
                for intent_sent in intent_sents:
                    print("-", intent_sent, file=examples_md_file)

                    print("", file=examples_md_file)

        # Create training YAML file
        with tempfile.NamedTemporaryFile(suffix=".json",
                                         mode="w+",
                                         delete=False) as training_file:

            training_config = StringIO()
            training_config.write('language: "%s"\n' % language)
            training_config.write('pipeline: "pretrained_embeddings_spacy"\n')

            # Write markdown directly into YAML.
            # Because reasons.
            with open(examples_md_path, "r") as examples_md_file:
                blank_line = False
                for line in examples_md_file:
                    line = line.strip()
                    if len(line) > 0:
                        if blank_line:
                            print("", file=training_file)
                            blank_line = False

                        print("  %s" % line, file=training_file)
                    else:
                        blank_line = True

            # Do training via HTTP API
            training_url = urljoin(url, "model/train")
            training_file.seek(0)
            with open(training_file.name, "rb") as training_data:

                training_body = {
                    "config": training_config.getvalue(),
                    "nlu": training_data.read().decode("utf-8"),
                }
                training_config.close()

                response = requests.post(
                    training_url,
                    data=json.dumps(training_body),
                    params=json.dumps({"project": project_name}),
                    headers={"Content-Type": "application/json"},
                )

            self._logger.debug("POSTed training data to %s", training_url)

            try:
                response.raise_for_status()
            except Exception:
                # Rasa gives quite helpful error messages, so extract them from the response.
                raise Exception("{0}: {1}".format(
                    response.reason,
                    json.loads(response.content)["message"]))