Beispiel #1
0
    def test_slots(self):
        slot_fsts = slots_to_fsts(Path("test/slots"))
        self.assertIn("$colors", slot_fsts)

        # Verify slot values
        values = set(v[0] for v in fstprintall(slot_fsts["$colors"]))
        self.assertSetEqual(
            values, {"yellow", "purple", "orange", "blue", "red", "green"})

        # Fold into a grammar
        grammar = Path("test/ChangeLightColor.gram").read_text()
        grammar_fst = grammar_to_fsts(grammar,
                                      replace_fsts=slot_fsts).grammar_fst
        self.assertGreater(len(list(grammar_fst.states())), 0)

        intents = fstaccept(grammar_fst,
                            "set color to orange",
                            intent_name="ChangeLightColor")
        intent = intents[0]

        logging.debug(intent)
        self.assertEqual(intent["intent"]["name"], "ChangeLightColor")
        self.assertEqual(intent["intent"]["confidence"], 1)
        self.assertEqual(len(intent["entities"]), 1)

        ev = intent["entities"][0]
        self.assertEqual(ev["entity"], "color")
        self.assertEqual(ev["value"], "orange")
Beispiel #2
0
def make_sentences_by_intent(intent_fst: fst.Fst) -> Dict[str, Any]:
    # { intent: [ { 'text': ..., 'entities': { ... } }, ... ] }
    sentences_by_intent: Dict[str, Any] = defaultdict(list)

    for symbols in fstprintall(intent_fst, exclude_meta=False):
        intent = symbols2intent(symbols)
        intent_name = intent["intent"]["name"]
        sentences_by_intent[intent_name].append(intent)

    return sentences_by_intent
Beispiel #3
0
    def test_end_disjunction(self):
        fst = jsgf2fst(Path("test/GetGarageState.gram"))
        self.assertGreater(len(list(fst.states())), 0)
        sentences = fstprintall(fst, exclude_meta=False)
        self.assertEqual(len(sentences), 2)

        # Join strings
        sentences = [" ".join(s) for s in sentences]

        self.assertIn("is the garage door open", sentences)
        self.assertIn("is the garage door closed", sentences)
Beispiel #4
0
    def test_printall(self):
        slots = read_slots("test/slots")
        fst = jsgf2fst(Path("test/ChangeLightColor.gram"), slots=slots)
        self.assertGreater(len(list(fst.states())), 0)
        sentences = fstprintall(fst, exclude_meta=False)
        self.assertEqual(len(sentences), 12)

        # Verify all sentences have intent/entity meta tokens
        for sentence in sentences:
            self.assertIn("__begin__color", sentence)
            self.assertIn("__end__color", sentence)
Beispiel #5
0
    def sample_sentences(intent_name: str, intent_fst_path: str):
        rand_fst = fst.Fst.read_from_string(
            subprocess.check_output(
                ["fstrandgen", f"--npath={num_samples}", intent_fst_path]))

        sentences: List[Dict[str, Any]] = []
        for symbols in fstprintall(rand_fst, exclude_meta=False):
            intent = symbols2intent(symbols)
            intent_name = intent["intent"]["name"]
            sentences.append(intent)

        return sentences
Beispiel #6
0
    def test_printall(self):
        slot_fsts = slots_to_fsts(Path("test/slots"))
        grammar = Path("test/ChangeLightColor.gram").read_text()
        grammar_fst = grammar_to_fsts(grammar,
                                      replace_fsts=slot_fsts).grammar_fst
        self.assertGreater(len(list(grammar_fst.states())), 0)
        sentences = fstprintall(grammar_fst, exclude_meta=False)
        self.assertEqual(len(sentences), 12)

        # Verify all sentences have intent/entity meta tokens
        for sentence in sentences:
            self.assertIn("__begin__color", sentence)
            self.assertIn("__end__color", sentence)
Beispiel #7
0
    def test_timer(self):
        grammar = Path("test/SetTimer.gram").read_text()
        timer_fst = grammar_to_fsts(grammar).grammar_fst
        self.assertGreater(len(list(timer_fst.states())), 0)
        timer_fst.write("timer.fst")

        intents = fstaccept(
            timer_fst,
            "set a timer for ten minutes and forty two seconds",
            intent_name="SetTimer",
        )

        intent = intents[0]

        logging.debug(intent)
        self.assertEqual(intent["intent"]["name"], "SetTimer")
        self.assertEqual(intent["intent"]["confidence"], 1)
        self.assertEqual(len(intent["entities"]), 2)

        # Verify text with replacements
        text = intent["text"]
        self.assertEqual(text, "set a timer for 10 minutes and 40 2 seconds")

        # Verify "raw" text (no replacements)
        raw_text = intent["raw_text"]
        self.assertEqual(raw_text,
                         "set a timer for ten minutes and forty two seconds")

        # Verify individual entities
        expected = {"minutes": "10", "seconds": "40 2"}
        raw_expected = {"minutes": "ten", "seconds": "forty two"}

        for ev in intent["entities"]:
            entity = ev["entity"]
            if (entity in expected) and (ev["value"] == expected[entity]):
                # Check start/end inside text
                start, end = ev["start"], ev["end"]
                self.assertEqual(text[start:end], ev["value"])
                expected.pop(entity)

            if (entity in raw_expected) and (ev["raw_value"]
                                             == raw_expected[entity]):
                raw_expected.pop(entity)

        self.assertDictEqual(expected, {})
        self.assertDictEqual(raw_expected, {})

        # Verify number of sentences (takes a long time)
        logging.debug("Counting all possible test sentences...")
        sentences = fstprintall(timer_fst, exclude_meta=False)
        self.assertEqual(len(sentences), 2 * (59 * (1 + (2 * 59))))
Beispiel #8
0
    def test_end_disjunction(self):
        # GetGarageState
        grammar = Path("test/GetGarageState.gram").read_text()
        result = grammar_to_fsts(grammar)
        grammar_fst = result.grammar_fst

        self.assertGreater(len(list(grammar_fst.states())), 0)
        self.assertIsNotNone(grammar_fst.input_symbols())
        self.assertIsNotNone(grammar_fst.output_symbols())

        sentences = fstprintall(grammar_fst, exclude_meta=False)
        self.assertEqual(len(sentences), 2)

        # Join strings
        sentences = [" ".join(s) for s in sentences]

        self.assertIn("is the garage door open", sentences)
        self.assertIn("is the garage door closed", sentences)
Beispiel #9
0
    def train(self, sentences_by_intent):
        from jsgf2fst import fstprintall

        self._logger.debug(self.command)

        try:
            # { intent: [ { 'text': ..., 'entities': { ... } }, ... ] }
            sentences_by_intent: Dict[str, Any] = defaultdict(list)

            for symbols in fstprintall(intent_fst, exclude_meta=False):
                intent = symbols2intent(symbols)
                intent_name = intent["intent"]["name"]
                sentences_by_intent[intent_name].append(intent)

            # JSON -> STDIN
            input = json.dumps(sentences_by_intent).encode()

            subprocess.run(self.command, input=input, check=True)
        except:
            self._logger.exception("train")
Beispiel #10
0
    def train(self, intent_fst) -> None:
        from jsgf2fst import fstprintall
        import requests

        # Load settings
        language = self.profile.get("language", "en")
        rasa_config = self.profile.get("intent.rasa", {})

        url = rasa_config.get("url", "http://localhost:5005")
        project_name = rasa_config.get("project_name", "rhasspy")

        # Create markdown examples
        examples_md_path = self.profile.write_path(
            rasa_config.get("examples_markdown", "intent_examples.md")
        )

        # Build Markdown sentences
        sentences_by_intent: Dict[str, Any] = defaultdict(list)
        for symbols in fstprintall(intent_fst, exclude_meta=False):
            intent_name = ""
            strings = []
            for sym in symbols:
                if sym.startswith("<"):
                    continue  # <eps>
                elif sym.startswith("__label__"):
                    intent_name = sym[9:]
                elif sym.startswith("__begin__"):
                    strings.append("[")
                elif sym.startswith("__end__"):
                    strings[-1] = strings[-1].strip()
                    tag = sym[7:]
                    strings.append(f"]({tag})")
                    strings.append(" ")
                else:
                    strings.append(sym)
                    strings.append(" ")

            sentence = "".join(strings).strip()
            sentences_by_intent[intent_name].append(sentence)

        # Write to YAML file
        with open(examples_md_path, "w") as examples_md_file:
            for intent_name, intent_sents in sentences_by_intent.items():
                # Rasa Markdown training format
                print(f"## intent:{intent_name}", file=examples_md_file)
                for intent_sent in intent_sents:
                    print("-", intent_sent, file=examples_md_file)

                    print("", file=examples_md_file)

        # Create training YAML file
        with tempfile.NamedTemporaryFile(
            suffix=".json", mode="w+", delete=False
        ) as training_file:

            training_config = StringIO()
            training_config.write('language: "%s"\n' % language)
            training_config.write('pipeline: "pretrained_embeddings_spacy"\n')

            # Write markdown directly into YAML.
            # Because reasons.
            with open(examples_md_path, "r") as examples_md_file:
                blank_line = False
                for line in examples_md_file:
                    line = line.strip()
                    if len(line) > 0:
                        if blank_line:
                            print("", file=training_file)
                            blank_line = False

                        print("  %s" % line, file=training_file)
                    else:
                        blank_line = True

            # Do training via HTTP API
            training_url = urljoin(url, "model/train")
            training_file.seek(0)
            with open(training_file.name, "rb") as training_data:

                training_body = {
                    "config": training_config.getvalue(),
                    "nlu": training_data.read().decode("utf-8"),
                }
                training_config.close()

                response = requests.post(
                    training_url,
                    data=json.dumps(training_body),
                    params=json.dumps({"project": project_name}),
                    headers={"Content-Type": "application/json"},
                )

            self._logger.debug(f"POSTed training data to {training_url}")

            try:
                response.raise_for_status()
            except:
                # Rasa gives quite helpful error messages, so extract them from the response.
                raise Exception(
                    f"{response.reason}: {json.loads(response.content)['message']}"
                )