def print_message(message: Message) -> None: features = {**message.as_dict_nlu()} seq_vecs, sen_vecs = message.get_dense_features(TEXT) features["dense"] = { "sequence": None if not seq_vecs else dense_message(seq_vecs.features), "sentence": None if not sen_vecs else dense_message(sen_vecs.features), } seq_vecs, sen_vecs = message.get_sparse_features(TEXT) features["sparse"] = { "sequence": None if not seq_vecs else sparse_message(seq_vecs.features), "sentence": None if not sen_vecs else sparse_message(sen_vecs.features), } if "text_tokens" in features.keys(): features["text_tokens"] = [t.text for t in features["text_tokens"]] if "intent" in features.keys(): features["intent"] = { k: v for k, v in features["intent"].items() if "id" != k } if "intent_ranking" in features.keys(): features["intent_ranking"] = [{ k: v for k, v in i.items() if "id" != k } for i in features["intent_ranking"]] if "diagnostic_data" in features.keys(): features["diagnostic_data"] = { name: {k: dense_message(v) for k, v in comp.items()} for name, comp in features["diagnostic_data"].items() } print(features)
def __getitem__(self, item): """ Retreive a single embedding or a set of embeddings. We retreive the sentence encoding that belongs to the entire utterance. Arguments: item: single string or list of strings **Usage** ```python from whatlies.language import DIETLanguage lang = DIETLanguage("path/to/model.tar.gz") lang[['hi', 'hello', 'greetings']] ``` """ if isinstance(item, str): with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=RuntimeWarning) msg = Message({"text": item}) for p in self.pipeline: p.process(msg) diagnostic_data = msg.as_dict_nlu()["diagnostic_data"] key_of_interest = [ k for k in diagnostic_data.keys() if "DIET" in k ][0] # It's assumed that the final token in the array here represents the __CLS__ token. # These are also known as the "sentence embeddings" tensors = diagnostic_data[key_of_interest]["text_transformed"] return Embedding(item, tensors[-1][-1]) if isinstance(item, list): return EmbeddingSet(*[self[i] for i in item]) raise ValueError(f"Item must be list of strings got {item}.")
def fetch_info_from_message(interpreter, text_input): blob = interpreter.parse(text_input) msg = Message({TEXT: text_input}) for i, element in enumerate(interpreter.pipeline): element.process(msg) nlu_dict = msg.as_dict_nlu() tokens = [t.text for t in nlu_dict["text_tokens"]] return blob, nlu_dict, tokens