예제 #1
0
def print_message(message: Message) -> None:
    features = {**message.as_dict_nlu()}
    seq_vecs, sen_vecs = message.get_dense_features(TEXT)
    features["dense"] = {
        "sequence": None if not seq_vecs else dense_message(seq_vecs.features),
        "sentence": None if not sen_vecs else dense_message(sen_vecs.features),
    }
    seq_vecs, sen_vecs = message.get_sparse_features(TEXT)
    features["sparse"] = {
        "sequence":
        None if not seq_vecs else sparse_message(seq_vecs.features),
        "sentence":
        None if not sen_vecs else sparse_message(sen_vecs.features),
    }
    if "text_tokens" in features.keys():
        features["text_tokens"] = [t.text for t in features["text_tokens"]]
    if "intent" in features.keys():
        features["intent"] = {
            k: v
            for k, v in features["intent"].items() if "id" != k
        }
    if "intent_ranking" in features.keys():
        features["intent_ranking"] = [{
            k: v
            for k, v in i.items() if "id" != k
        } for i in features["intent_ranking"]]

    if "diagnostic_data" in features.keys():
        features["diagnostic_data"] = {
            name: {k: dense_message(v)
                   for k, v in comp.items()}
            for name, comp in features["diagnostic_data"].items()
        }
    print(features)
예제 #2
0
    def __getitem__(self, item):
        """
        Retreive a single embedding or a set of embeddings. We retreive the sentence encoding that
        belongs to the entire utterance.

        Arguments:
            item: single string or list of strings

        **Usage**
        ```python
        from whatlies.language import DIETLanguage

        lang = DIETLanguage("path/to/model.tar.gz")
        lang[['hi', 'hello', 'greetings']]
        ```
        """
        if isinstance(item, str):
            with warnings.catch_warnings():
                warnings.filterwarnings("ignore", category=RuntimeWarning)
                msg = Message({"text": item})
                for p in self.pipeline:
                    p.process(msg)
                diagnostic_data = msg.as_dict_nlu()["diagnostic_data"]
                key_of_interest = [
                    k for k in diagnostic_data.keys() if "DIET" in k
                ][0]
                # It's assumed that the final token in the array here represents the __CLS__ token.
                # These are also known as the "sentence embeddings"
                tensors = diagnostic_data[key_of_interest]["text_transformed"]
                return Embedding(item, tensors[-1][-1])
        if isinstance(item, list):
            return EmbeddingSet(*[self[i] for i in item])
        raise ValueError(f"Item must be list of strings got {item}.")
예제 #3
0
def fetch_info_from_message(interpreter, text_input):
    blob = interpreter.parse(text_input)

    msg = Message({TEXT: text_input})
    for i, element in enumerate(interpreter.pipeline):
        element.process(msg)

    nlu_dict = msg.as_dict_nlu()
    tokens = [t.text for t in nlu_dict["text_tokens"]]
    return blob, nlu_dict, tokens