예제 #1
0
    async def handle_query(
        self, query: NluQuery
    ) -> typing.AsyncIterable[typing.Union[NluIntentParsed, typing.Tuple[
            NluIntent, TopicArgs], NluIntentNotRecognized, NluError, ]]:
        """Do intent recognition."""
        original_input = query.input

        try:
            if not self.intent_graph and self.graph_path and self.graph_path.is_file(
            ):
                # Load graph from file
                _LOGGER.debug("Loading %s", self.graph_path)
                with open(self.graph_path, mode="rb") as graph_file:
                    self.intent_graph = rhasspynlu.gzip_pickle_to_graph(
                        graph_file)

            if self.intent_graph:

                def intent_filter(intent_name: str) -> bool:
                    """Filter out intents."""
                    if query.intent_filter:
                        return intent_name in query.intent_filter
                    return True

                # Replace digits with words
                if self.replace_numbers:
                    # Have to assume whitespace tokenization
                    words = rhasspynlu.replace_numbers(query.input.split(),
                                                       self.language)
                    query.input = " ".join(words)

                input_text = query.input

                # Fix casing for output event
                if self.word_transform:
                    input_text = self.word_transform(input_text)

                if self.failure_token and (self.failure_token
                                           in query.input.split()):
                    # Failure token was found in input
                    recognitions = []
                else:
                    # Pass in raw query input so raw values will be correct
                    recognitions = recognize(
                        query.input,
                        self.intent_graph,
                        intent_filter=intent_filter,
                        word_transform=self.word_transform,
                        fuzzy=self.fuzzy,
                        extra_converters=self.extra_converters,
                    )
            else:
                _LOGGER.error("No intent graph loaded")
                recognitions = []

            if NluHermesMqtt.is_success(recognitions):
                # Use first recognition only.
                recognition = recognitions[0]
                assert recognition is not None
                assert recognition.intent is not None

                intent = Intent(
                    intent_name=recognition.intent.name,
                    confidence_score=recognition.intent.confidence,
                )
                slots = [
                    Slot(
                        entity=(e.source or e.entity),
                        slot_name=e.entity,
                        confidence=1.0,
                        value=e.value_dict,
                        raw_value=e.raw_value,
                        range=SlotRange(
                            start=e.start,
                            end=e.end,
                            raw_start=e.raw_start,
                            raw_end=e.raw_end,
                        ),
                    ) for e in recognition.entities
                ]

                if query.custom_entities:
                    # Copy user-defined entities
                    for entity_name, entity_value in query.custom_entities.items(
                    ):
                        slots.append(
                            Slot(
                                entity=entity_name,
                                confidence=1.0,
                                value={"value": entity_value},
                            ))

                # intentParsed
                yield NluIntentParsed(
                    input=recognition.text,
                    id=query.id,
                    site_id=query.site_id,
                    session_id=query.session_id,
                    intent=intent,
                    slots=slots,
                )

                # intent
                yield (
                    NluIntent(
                        input=recognition.text,
                        id=query.id,
                        site_id=query.site_id,
                        session_id=query.session_id,
                        intent=intent,
                        slots=slots,
                        asr_tokens=[
                            NluIntent.make_asr_tokens(recognition.tokens)
                        ],
                        asr_confidence=query.asr_confidence,
                        raw_input=original_input,
                        wakeword_id=query.wakeword_id,
                        lang=(query.lang or self.lang),
                        custom_data=query.custom_data,
                    ),
                    {
                        "intent_name": recognition.intent.name
                    },
                )
            else:
                # Not recognized
                yield NluIntentNotRecognized(
                    input=query.input,
                    id=query.id,
                    site_id=query.site_id,
                    session_id=query.session_id,
                    custom_data=query.custom_data,
                )
        except Exception as e:
            _LOGGER.exception("handle_query")
            yield NluError(
                site_id=query.site_id,
                session_id=query.session_id,
                error=str(e),
                context=original_input,
            )
예제 #2
0
async def recognize(args: argparse.Namespace, core: Voice2JsonCore) -> None:
    """Recognize intent from sentence(s)."""
    import networkx as nx
    import rhasspynlu
    from .train import WordCasing

    # Make sure profile has been trained
    assert core.check_trained(), "Not trained"

    # Load settings
    language_code = pydash.get(core.profile, "language.code", "en-US")
    word_casing = WordCasing(
        pydash.get(core.profile, "training.word-casing", "ignore").lower())
    intent_graph_path = core.ppath("training.intent-graph", "intent.pickle.gz")
    converters_dir = core.ppath("training.converters-directory", "converters")
    stop_words_path = core.ppath("intent-recognition.stop-words",
                                 "stop_words.txt")
    fuzzy = pydash.get(core.profile, "intent-recognition.fuzzy", True)

    # Load stop words
    stop_words: typing.Optional[typing.Set[str]] = None
    if stop_words_path and stop_words_path.is_file():
        stop_words = set()
        with open(stop_words_path, "r") as stop_words_file:
            for line in stop_words_file:
                line = line.strip()
                if line:
                    stop_words.add(line)

    # Load converters
    extra_converters: typing.Optional[typing.Dict[str, typing.Any]] = {}
    if converters_dir:
        extra_converters = load_converters(converters_dir)

    # Case transformation for input words
    word_transform = None
    if word_casing == WordCasing.UPPER:
        word_transform = str.upper
    elif word_casing == WordCasing.LOWER:
        word_transform = str.lower

    if args.sentence:
        sentences = args.sentence
    else:
        if os.isatty(sys.stdin.fileno()):
            print("Reading sentences from stdin", file=sys.stderr)

        sentences = sys.stdin

    # Whitelist function for intents
    if args.intent_filter:
        args.intent_filter = set(args.intent_filter)

    def intent_filter(intent_name: str) -> bool:
        """Filter out intents."""
        if args.intent_filter:
            return intent_name in args.intent_filter

        return True

    # Load intent graph
    _LOGGER.debug("Loading %s", intent_graph_path)
    with gzip.GzipFile(intent_graph_path, mode="rb") as graph_gzip:
        intent_graph = nx.readwrite.gpickle.read_gpickle(graph_gzip)

    # Process sentences
    try:
        for sentence in sentences:
            if args.text_input:
                # Input is plain text
                text = sentence
                sentence_object = {"text": text}
            else:
                # Input is JSON
                sentence_object = json.loads(sentence)
                text = sentence_object.get(args.transcription_property, "")

            # Tokenize
            text = text.strip()
            tokens = text.split()

            if args.replace_numbers:
                tokens = list(
                    rhasspynlu.replace_numbers(tokens, language=language_code))

            # Recognize intent
            recognitions = rhasspynlu.recognize(
                tokens,
                intent_graph,
                fuzzy=fuzzy,
                stop_words=stop_words,
                word_transform=word_transform,
                extra_converters=extra_converters,
                intent_filter=intent_filter,
            )

            if recognitions:
                # Use first recognition
                recognition = recognitions[0]
            else:
                # Recognition failure
                recognition = rhasspynlu.intent.Recognition.empty()

            result = dataclasses.asdict(recognition)

            # Add slots
            result["slots"] = {e.entity: e.value for e in recognition.entities}

            # Merge with input object
            for key, value in result.items():
                if (key not in sentence_object) or (value is not None):
                    sentence_object[key] = value

            if not sentence_object["text"]:
                sentence_object["text"] = text

            # Keep text from transcription
            sentence_object["raw_text"] = text

            if args.perplexity:
                # Compute perplexity of input text for one or more language
                # models (stored in FST binary format).
                perplexity = {}
                for lm_fst_path in args.perplexity:
                    try:
                        perplexity[
                            lm_fst_path] = rhasspynlu.arpa_lm.get_perplexity(
                                text, lm_fst_path, debug=args.debug)
                    except Exception:
                        _LOGGER.exception(lm_fst_path)

                sentence_object["perplexity"] = perplexity

            print_json(sentence_object)
    except KeyboardInterrupt:
        pass
    async def handle_query(
        self, query: NluQuery
    ) -> typing.AsyncIterable[typing.Union[
            NluIntentParsed, NluIntentNotRecognized, NluError, ]]:
        """Do intent recognition."""
        try:
            # Replace digits with words
            if self.replace_numbers:
                # Have to assume whitespace tokenization
                words = rhasspynlu.replace_numbers(query.input.split(),
                                                   self.number_language)
                query.input = " ".join(words)

            input_text = query.input

            # Fix casing for output event
            if self.word_transform:
                input_text = self.word_transform(input_text)

            parse_url = urljoin(self.rasa_url, "model/parse")
            _LOGGER.debug(parse_url)

            async with self.http_session.post(
                    parse_url,
                    json={
                        "text": input_text,
                        "project": self.rasa_project
                    },
                    ssl=self.ssl_context,
            ) as response:
                response.raise_for_status()
                intent_json = await response.json()
                intent = intent_json.get("intent", {})
                intent_name = intent.get("name", "")

                if intent_name and (query.intent_filter is None
                                    or intent_name in query.intent_filter):
                    confidence_score = float(intent.get("confidence", 0.0))
                    slots = [
                        Slot(
                            entity=e.get("entity", ""),
                            slot_name=e.get("entity", ""),
                            confidence=float(e.get("confidence", 0.0)),
                            value={
                                "kind": "Unknown",
                                "value": e.get("value", ""),
                                "additional_info":
                                e.get("additional_info", {}),
                                "extractor": e.get("extractor", None),
                            },
                            raw_value=e.get("value", ""),
                            range=SlotRange(
                                start=int(e.get("start", 0)),
                                end=int(e.get("end", 1)),
                                raw_start=int(e.get("start", 0)),
                                raw_end=int(e.get("end", 1)),
                            ),
                        ) for e in intent_json.get("entities", [])
                    ]

                    # intentParsed
                    yield NluIntentParsed(
                        input=input_text,
                        id=query.id,
                        site_id=query.site_id,
                        session_id=query.session_id,
                        intent=Intent(intent_name=intent_name,
                                      confidence_score=confidence_score),
                        slots=slots,
                    )
                else:
                    # Not recognized
                    yield NluIntentNotRecognized(
                        input=query.input,
                        id=query.id,
                        site_id=query.site_id,
                        session_id=query.session_id,
                    )
        except Exception as e:
            _LOGGER.exception("nlu query")
            yield NluError(
                site_id=query.site_id,
                session_id=query.session_id,
                error=str(e),
                context=query.input,
            )
예제 #4
0
    async def handle_query(
        self, query: NluQuery
    ) -> typing.AsyncIterable[typing.Union[NluIntentParsed, typing.Tuple[
            NluIntent, TopicArgs], NluIntentNotRecognized, NluError, ]]:
        """Do intent recognition."""
        # Check intent graph
        try:
            if (not self.intent_graph and self.intent_graph_path
                    and self.intent_graph_path.is_file()):
                _LOGGER.debug("Loading %s", self.intent_graph_path)
                with open(self.intent_graph_path, mode="rb") as graph_file:
                    self.intent_graph = rhasspynlu.gzip_pickle_to_graph(
                        graph_file)

            # Check examples
            if (self.intent_graph and self.examples_path
                    and self.examples_path.is_file()):

                def intent_filter(intent_name: str) -> bool:
                    """Filter out intents."""
                    if query.intent_filter:
                        return intent_name in query.intent_filter
                    return True

                original_text = query.input

                # Replace digits with words
                if self.replace_numbers:
                    # Have to assume whitespace tokenization
                    words = rhasspynlu.replace_numbers(query.input.split(),
                                                       self.language)
                    query.input = " ".join(words)

                input_text = query.input

                # Fix casing
                if self.word_transform:
                    input_text = self.word_transform(input_text)

                recognitions: typing.List[rhasspynlu.intent.Recognition] = []

                if input_text:
                    recognitions = rhasspyfuzzywuzzy.recognize(
                        input_text,
                        self.intent_graph,
                        str(self.examples_path),
                        intent_filter=intent_filter,
                        extra_converters=self.extra_converters,
                    )
            else:
                _LOGGER.error("No intent graph or examples loaded")
                recognitions = []

            # Use first recognition only if above threshold
            if (recognitions and recognitions[0] and recognitions[0].intent
                    and (recognitions[0].intent.confidence >=
                         self.confidence_threshold)):
                recognition = recognitions[0]
                assert recognition.intent
                intent = Intent(
                    intent_name=recognition.intent.name,
                    confidence_score=recognition.intent.confidence,
                )
                slots = [
                    Slot(
                        entity=(e.source or e.entity),
                        slot_name=e.entity,
                        confidence=1.0,
                        value=e.value_dict,
                        raw_value=e.raw_value,
                        range=SlotRange(
                            start=e.start,
                            end=e.end,
                            raw_start=e.raw_start,
                            raw_end=e.raw_end,
                        ),
                    ) for e in recognition.entities
                ]

                if query.custom_entities:
                    # Copy user-defined entities
                    for entity_name, entity_value in query.custom_entities.items(
                    ):
                        slots.append(
                            Slot(
                                entity=entity_name,
                                confidence=1.0,
                                value={"value": entity_value},
                            ))

                # intentParsed
                yield NluIntentParsed(
                    input=recognition.text,
                    id=query.id,
                    site_id=query.site_id,
                    session_id=query.session_id,
                    intent=intent,
                    slots=slots,
                )

                # intent
                yield (
                    NluIntent(
                        input=recognition.text,
                        id=query.id,
                        site_id=query.site_id,
                        session_id=query.session_id,
                        intent=intent,
                        slots=slots,
                        asr_tokens=[
                            NluIntent.make_asr_tokens(recognition.tokens)
                        ],
                        asr_confidence=query.asr_confidence,
                        raw_input=original_text,
                        wakeword_id=query.wakeword_id,
                        lang=(query.lang or self.lang),
                        custom_data=query.custom_data,
                    ),
                    {
                        "intent_name": recognition.intent.name
                    },
                )
            else:
                # Not recognized
                yield NluIntentNotRecognized(
                    input=query.input,
                    id=query.id,
                    site_id=query.site_id,
                    session_id=query.session_id,
                    custom_data=query.custom_data,
                )
        except Exception as e:
            _LOGGER.exception("handle_query")
            yield NluError(
                site_id=query.site_id,
                session_id=query.session_id,
                error=str(e),
                context=original_text,
            )
예제 #5
0
    async def handle_query(
        self, query: NluQuery
    ) -> typing.AsyncIterable[typing.Union[NluIntentParsed, typing.Tuple[
            NluIntent, TopicArgs], NluIntentNotRecognized, NluError, ]]:
        """Do intent recognition."""
        try:
            original_input = query.input

            # Replace digits with words
            if self.replace_numbers:
                # Have to assume whitespace tokenization
                words = rhasspynlu.replace_numbers(query.input.split(),
                                                   self.number_language)
                query.input = " ".join(words)

            input_text = query.input

            # Fix casing for output event
            if self.word_transform:
                input_text = self.word_transform(input_text)

            parse_url = urljoin(self.rasa_url, "model/parse")
            _LOGGER.debug(parse_url)

            async with self.http_session.post(
                    parse_url,
                    json={
                        "text": input_text,
                        "project": self.rasa_project
                    },
                    ssl=self.ssl_context,
            ) as response:
                response.raise_for_status()
                intent_json = await response.json()
                intent = intent_json.get("intent", {})
                intent_name = intent.get("name", "")

                if intent_name and (query.intent_filter is None
                                    or intent_name in query.intent_filter):
                    confidence_score = float(intent.get("confidence", 0.0))
                    slots = [
                        Slot(
                            entity=e.get("entity", ""),
                            slot_name=e.get("entity", ""),
                            confidence=float(e.get("confidence", 0.0)),
                            value={
                                "kind": "Unknown",
                                "value": e.get("value", "")
                            },
                            raw_value=e.get("value", ""),
                            range=SlotRange(
                                start=int(e.get("start", 0)),
                                end=int(e.get("end", 1)),
                                raw_start=int(e.get("start", 0)),
                                raw_end=int(e.get("end", 1)),
                            ),
                        ) for e in intent_json.get("entities", [])
                    ]

                    if query.custom_entities:
                        # Copy user-defined entities
                        for entity_name, entity_value in query.custom_entities.items(
                        ):
                            slots.append(
                                Slot(
                                    entity=entity_name,
                                    confidence=1.0,
                                    value={"value": entity_value},
                                ))

                    # intentParsed
                    yield NluIntentParsed(
                        input=input_text,
                        id=query.id,
                        site_id=query.site_id,
                        session_id=query.session_id,
                        intent=Intent(intent_name=intent_name,
                                      confidence_score=confidence_score),
                        slots=slots,
                    )

                    # intent
                    yield (
                        NluIntent(
                            input=input_text,
                            id=query.id,
                            site_id=query.site_id,
                            session_id=query.session_id,
                            intent=Intent(
                                intent_name=intent_name,
                                confidence_score=confidence_score,
                            ),
                            slots=slots,
                            asr_tokens=[
                                NluIntent.make_asr_tokens(input_text.split())
                            ],
                            asr_confidence=query.asr_confidence,
                            raw_input=original_input,
                            lang=(query.lang or self.lang),
                            custom_data=query.custom_data,
                        ),
                        {
                            "intent_name": intent_name
                        },
                    )
                else:
                    # Not recognized
                    yield NluIntentNotRecognized(
                        input=query.input,
                        id=query.id,
                        site_id=query.site_id,
                        session_id=query.session_id,
                        custom_data=query.custom_data,
                    )
        except Exception as e:
            _LOGGER.exception("nlu query")
            yield NluError(
                site_id=query.site_id,
                session_id=query.session_id,
                error=str(e),
                context=query.input,
            )