def sample_sentences(intent_name: str, intent_fst_path: str): rand_fst = fst.Fst.read_from_string( subprocess.check_output( ["fstrandgen", f"--npath={num_samples}", intent_fst_path])) sentences: List[Dict[str, Any]] = [] for symbols in fstprintall(rand_fst, exclude_meta=False): intent = symbols2intent(symbols) sentences.append(intent) return sentences
def make_sentences_by_intent(intent_fst: fst.Fst) -> Dict[str, Any]: """Get all sentences from an FST.""" from rhasspy.train.jsgf2fst import fstprintall, symbols2intent # { intent: [ { 'text': ..., 'entities': { ... } }, ... ] } sentences_by_intent: Dict[str, Any] = defaultdict(list) for symbols in fstprintall(intent_fst, exclude_meta=False): intent = symbols2intent(symbols) intent_name = intent["intent"]["name"] sentences_by_intent[intent_name].append(intent) return sentences_by_intent
def train(self, intent_fst: fst.Fst): """Train using an external program.""" self._logger.debug(self.command) try: # { intent: [ { 'text': ..., 'entities': { ... } }, ... ] } sentences_by_intent: Dict[str, Any] = defaultdict(list) for symbols in fstprintall(intent_fst, exclude_meta=False): intent = symbols2intent(symbols) intent_name = intent["intent"]["name"] sentences_by_intent[intent_name].append(intent) # JSON -> STDIN json_input = json.dumps(sentences_by_intent).encode() subprocess.run(self.command, input=json_input, check=True) except Exception: self._logger.exception("train")
def train(self, intent_fst) -> None: """Convert examples to Markdown and POST to RasaNLU server.""" from rhasspy.train.jsgf2fst import fstprintall import requests # Load settings language = self.profile.get("language", "en") rasa_config = self.profile.get("intent.rasa", {}) url = rasa_config.get("url", "http://localhost:5005") project_name = rasa_config.get("project_name", "rhasspy") # Create markdown examples examples_md_path = self.profile.write_path( rasa_config.get("examples_markdown", "intent_examples.md")) # Build Markdown sentences sentences_by_intent: Dict[str, Any] = defaultdict(list) for symbols in fstprintall(intent_fst, exclude_meta=False): intent_name = "" strings = [] for sym in symbols: if sym.startswith("<"): continue # <eps> if sym.startswith("__label__"): intent_name = sym[9:] elif sym.startswith("__begin__"): strings.append("[") elif sym.startswith("__end__"): strings[-1] = strings[-1].strip() tag = sym[7:] strings.append(f"]({tag})") strings.append(" ") else: strings.append(sym) strings.append(" ") sentence = "".join(strings).strip() sentences_by_intent[intent_name].append(sentence) # Write to YAML file with open(examples_md_path, "w") as examples_md_file: for intent_name, intent_sents in sentences_by_intent.items(): # Rasa Markdown training format print(f"## intent:{intent_name}", file=examples_md_file) for intent_sent in intent_sents: print("-", intent_sent, file=examples_md_file) print("", file=examples_md_file) # Create training YAML file with tempfile.NamedTemporaryFile(suffix=".json", mode="w+", delete=False) as training_file: training_config = StringIO() training_config.write('language: "%s"\n' % language) training_config.write('pipeline: "pretrained_embeddings_spacy"\n') # Write markdown directly into YAML. # Because reasons. with open(examples_md_path, "r") as examples_md_file: blank_line = False for line in examples_md_file: line = line.strip() if len(line) > 0: if blank_line: print("", file=training_file) blank_line = False print(" %s" % line, file=training_file) else: blank_line = True # Do training via HTTP API training_url = urljoin(url, "model/train") training_file.seek(0) with open(training_file.name, "rb") as training_data: training_body = { "config": training_config.getvalue(), "nlu": training_data.read().decode("utf-8"), } training_config.close() response = requests.post( training_url, data=json.dumps(training_body), params=json.dumps({"project": project_name}), headers={"Content-Type": "application/json"}, ) self._logger.debug("POSTed training data to %s", training_url) try: response.raise_for_status() except Exception: # Rasa gives quite helpful error messages, so extract them from the response. raise Exception("{0}: {1}".format( response.reason, json.loads(response.content)["message"]))