def test_slots(self): slot_fsts = slots_to_fsts(Path("test/slots")) self.assertIn("$colors", slot_fsts) # Verify slot values values = set(v[0] for v in fstprintall(slot_fsts["$colors"])) self.assertSetEqual( values, {"yellow", "purple", "orange", "blue", "red", "green"}) # Fold into a grammar grammar = Path("test/ChangeLightColor.gram").read_text() grammar_fst = grammar_to_fsts(grammar, replace_fsts=slot_fsts).grammar_fst self.assertGreater(len(list(grammar_fst.states())), 0) intents = fstaccept(grammar_fst, "set color to orange", intent_name="ChangeLightColor") intent = intents[0] logging.debug(intent) self.assertEqual(intent["intent"]["name"], "ChangeLightColor") self.assertEqual(intent["intent"]["confidence"], 1) self.assertEqual(len(intent["entities"]), 1) ev = intent["entities"][0] self.assertEqual(ev["entity"], "color") self.assertEqual(ev["value"], "orange")
def make_sentences_by_intent(intent_fst: fst.Fst) -> Dict[str, Any]: # { intent: [ { 'text': ..., 'entities': { ... } }, ... ] } sentences_by_intent: Dict[str, Any] = defaultdict(list) for symbols in fstprintall(intent_fst, exclude_meta=False): intent = symbols2intent(symbols) intent_name = intent["intent"]["name"] sentences_by_intent[intent_name].append(intent) return sentences_by_intent
def test_end_disjunction(self): fst = jsgf2fst(Path("test/GetGarageState.gram")) self.assertGreater(len(list(fst.states())), 0) sentences = fstprintall(fst, exclude_meta=False) self.assertEqual(len(sentences), 2) # Join strings sentences = [" ".join(s) for s in sentences] self.assertIn("is the garage door open", sentences) self.assertIn("is the garage door closed", sentences)
def test_printall(self): slots = read_slots("test/slots") fst = jsgf2fst(Path("test/ChangeLightColor.gram"), slots=slots) self.assertGreater(len(list(fst.states())), 0) sentences = fstprintall(fst, exclude_meta=False) self.assertEqual(len(sentences), 12) # Verify all sentences have intent/entity meta tokens for sentence in sentences: self.assertIn("__begin__color", sentence) self.assertIn("__end__color", sentence)
def sample_sentences(intent_name: str, intent_fst_path: str): rand_fst = fst.Fst.read_from_string( subprocess.check_output( ["fstrandgen", f"--npath={num_samples}", intent_fst_path])) sentences: List[Dict[str, Any]] = [] for symbols in fstprintall(rand_fst, exclude_meta=False): intent = symbols2intent(symbols) intent_name = intent["intent"]["name"] sentences.append(intent) return sentences
def test_printall(self): slot_fsts = slots_to_fsts(Path("test/slots")) grammar = Path("test/ChangeLightColor.gram").read_text() grammar_fst = grammar_to_fsts(grammar, replace_fsts=slot_fsts).grammar_fst self.assertGreater(len(list(grammar_fst.states())), 0) sentences = fstprintall(grammar_fst, exclude_meta=False) self.assertEqual(len(sentences), 12) # Verify all sentences have intent/entity meta tokens for sentence in sentences: self.assertIn("__begin__color", sentence) self.assertIn("__end__color", sentence)
def test_timer(self): grammar = Path("test/SetTimer.gram").read_text() timer_fst = grammar_to_fsts(grammar).grammar_fst self.assertGreater(len(list(timer_fst.states())), 0) timer_fst.write("timer.fst") intents = fstaccept( timer_fst, "set a timer for ten minutes and forty two seconds", intent_name="SetTimer", ) intent = intents[0] logging.debug(intent) self.assertEqual(intent["intent"]["name"], "SetTimer") self.assertEqual(intent["intent"]["confidence"], 1) self.assertEqual(len(intent["entities"]), 2) # Verify text with replacements text = intent["text"] self.assertEqual(text, "set a timer for 10 minutes and 40 2 seconds") # Verify "raw" text (no replacements) raw_text = intent["raw_text"] self.assertEqual(raw_text, "set a timer for ten minutes and forty two seconds") # Verify individual entities expected = {"minutes": "10", "seconds": "40 2"} raw_expected = {"minutes": "ten", "seconds": "forty two"} for ev in intent["entities"]: entity = ev["entity"] if (entity in expected) and (ev["value"] == expected[entity]): # Check start/end inside text start, end = ev["start"], ev["end"] self.assertEqual(text[start:end], ev["value"]) expected.pop(entity) if (entity in raw_expected) and (ev["raw_value"] == raw_expected[entity]): raw_expected.pop(entity) self.assertDictEqual(expected, {}) self.assertDictEqual(raw_expected, {}) # Verify number of sentences (takes a long time) logging.debug("Counting all possible test sentences...") sentences = fstprintall(timer_fst, exclude_meta=False) self.assertEqual(len(sentences), 2 * (59 * (1 + (2 * 59))))
def test_end_disjunction(self): # GetGarageState grammar = Path("test/GetGarageState.gram").read_text() result = grammar_to_fsts(grammar) grammar_fst = result.grammar_fst self.assertGreater(len(list(grammar_fst.states())), 0) self.assertIsNotNone(grammar_fst.input_symbols()) self.assertIsNotNone(grammar_fst.output_symbols()) sentences = fstprintall(grammar_fst, exclude_meta=False) self.assertEqual(len(sentences), 2) # Join strings sentences = [" ".join(s) for s in sentences] self.assertIn("is the garage door open", sentences) self.assertIn("is the garage door closed", sentences)
def train(self, sentences_by_intent): from jsgf2fst import fstprintall self._logger.debug(self.command) try: # { intent: [ { 'text': ..., 'entities': { ... } }, ... ] } sentences_by_intent: Dict[str, Any] = defaultdict(list) for symbols in fstprintall(intent_fst, exclude_meta=False): intent = symbols2intent(symbols) intent_name = intent["intent"]["name"] sentences_by_intent[intent_name].append(intent) # JSON -> STDIN input = json.dumps(sentences_by_intent).encode() subprocess.run(self.command, input=input, check=True) except: self._logger.exception("train")
def train(self, intent_fst) -> None: from jsgf2fst import fstprintall import requests # Load settings language = self.profile.get("language", "en") rasa_config = self.profile.get("intent.rasa", {}) url = rasa_config.get("url", "http://localhost:5005") project_name = rasa_config.get("project_name", "rhasspy") # Create markdown examples examples_md_path = self.profile.write_path( rasa_config.get("examples_markdown", "intent_examples.md") ) # Build Markdown sentences sentences_by_intent: Dict[str, Any] = defaultdict(list) for symbols in fstprintall(intent_fst, exclude_meta=False): intent_name = "" strings = [] for sym in symbols: if sym.startswith("<"): continue # <eps> elif sym.startswith("__label__"): intent_name = sym[9:] elif sym.startswith("__begin__"): strings.append("[") elif sym.startswith("__end__"): strings[-1] = strings[-1].strip() tag = sym[7:] strings.append(f"]({tag})") strings.append(" ") else: strings.append(sym) strings.append(" ") sentence = "".join(strings).strip() sentences_by_intent[intent_name].append(sentence) # Write to YAML file with open(examples_md_path, "w") as examples_md_file: for intent_name, intent_sents in sentences_by_intent.items(): # Rasa Markdown training format print(f"## intent:{intent_name}", file=examples_md_file) for intent_sent in intent_sents: print("-", intent_sent, file=examples_md_file) print("", file=examples_md_file) # Create training YAML file with tempfile.NamedTemporaryFile( suffix=".json", mode="w+", delete=False ) as training_file: training_config = StringIO() training_config.write('language: "%s"\n' % language) training_config.write('pipeline: "pretrained_embeddings_spacy"\n') # Write markdown directly into YAML. # Because reasons. with open(examples_md_path, "r") as examples_md_file: blank_line = False for line in examples_md_file: line = line.strip() if len(line) > 0: if blank_line: print("", file=training_file) blank_line = False print(" %s" % line, file=training_file) else: blank_line = True # Do training via HTTP API training_url = urljoin(url, "model/train") training_file.seek(0) with open(training_file.name, "rb") as training_data: training_body = { "config": training_config.getvalue(), "nlu": training_data.read().decode("utf-8"), } training_config.close() response = requests.post( training_url, data=json.dumps(training_body), params=json.dumps({"project": project_name}), headers={"Content-Type": "application/json"}, ) self._logger.debug(f"POSTed training data to {training_url}") try: response.raise_for_status() except: # Rasa gives quite helpful error messages, so extract them from the response. raise Exception( f"{response.reason}: {json.loads(response.content)['message']}" )