def process(self, message: Message, **kwargs: Any) -> None: """Return the most likely intent and its probability for a message.""" if not self.clf: # component is either not trained or didn't # receive enough training data intent = None intent_ranking = [] else: X = message.get("text_features").reshape(1, -1) intent_ids, probabilities = self.predict(X) intents = self.transform_labels_num2str(np.ravel(intent_ids)) # `predict` returns a matrix as it is supposed # to work for multiple examples as well, hence we need to flatten probabilities = probabilities.flatten() if intents.size > 0 and probabilities.size > 0: ranking = list( zip(list(intents), list(probabilities)))[:INTENT_RANKING_LENGTH] intent = {"name": intents[0], "confidence": probabilities[0]} intent_ranking = [{ "name": intent_name, "confidence": score } for intent_name, score in ranking] else: intent = {"name": None, "confidence": 0.0} intent_ranking = [] message.set("intent", intent, add_to_output=True) message.set("intent_ranking", intent_ranking, add_to_output=True)
def process(self, message: Message, **kwargs: Any) -> None: mitie_feature_extractor = self._mitie_feature_extractor(**kwargs) features = self.features_for_tokens(message.get("tokens"), mitie_feature_extractor) message.set("text_features", self._combine_with_existing_text_features(message, features))
def process(self, message: Message, **kwargs: Any) -> None: self._check_spacy_doc(message) extracted = self.add_extractor_name(self.extract_entities(message)) message.set("entities", message.get("entities", []) + extracted, add_to_output=True)
def process(self, message: Message, **kwargs: Any) -> None: # can't use the existing doc here (spacy_doc on the message) # because tokens are lower cased which is bad for NER spacy_nlp = kwargs.get("spacy_nlp", None) doc = spacy_nlp(message.text) extracted = self.add_extractor_name(self.extract_entities(doc)) message.set("entities", message.get("entities", []) + extracted, add_to_output=True)
def process(self, message: Message, **kwargs: Any) -> None: if self.vect is None: logger.error("There is no trained CountVectorizer: " "component is either not trained or " "didn't receive enough training data") else: message_text = self._get_message_text(message) bag = self.vect.transform([message_text]).toarray().squeeze() message.set( "text_features", self._combine_with_existing_text_features(message, bag))
def process(self, message: Message, **kwargs: Any) -> None: mitie_feature_extractor = kwargs.get("mitie_feature_extractor") if not mitie_feature_extractor: raise Exception("Failed to train 'intent_featurizer_mitie'. " "Missing a proper MITIE feature extractor.") ents = self.extract_entities(message.text, message.get("tokens"), mitie_feature_extractor) extracted = self.add_extractor_name(ents) message.set("entities", message.get("entities", []) + extracted, add_to_output=True)
def _parse_training_example(self, example): """Extract entities and synonyms, and convert to plain text.""" from spawn_ai.training_data import Message entities = self._find_entities_in_training_example(example) plain_text = re.sub(ent_regex, lambda m: m.groupdict()['entity_text'], example) self._add_synonyms(plain_text, entities) message = Message(plain_text, {'intent': self.current_title}) if len(entities) > 0: message.set('entities', entities) return message
def process(self, message: Message, **kwargs: Any) -> None: if self._url() is not None: reference_time = self._reference_time_from_message(message) matches = self._duckling_parse(message.text, reference_time) dimensions = self.component_config["dimensions"] relevant_matches = filter_irrelevant_matches(matches, dimensions) extracted = convert_duckling_format_to_rasa(relevant_matches) else: extracted = [] logger.warning("Duckling HTTP component in pipeline, but no " "`url` configuration in the config " "file nor is `RASA_DUCKLING_HTTP_URL` " "set as an environment variable.") extracted = self.add_extractor_name(extracted) message.set("entities", message.get("entities", []) + extracted, add_to_output=True)
def process(self, message: Message, **kwargs: Any) -> None: mitie_feature_extractor = kwargs.get("mitie_feature_extractor") if not mitie_feature_extractor: raise Exception("Failed to train 'intent_featurizer_mitie'. " "Missing a proper MITIE feature extractor.") if self.clf: token_strs = self._tokens_of_message(message) intent, confidence = self.clf(token_strs, mitie_feature_extractor) else: # either the model didn't get trained or it wasn't # provided with any data intent = None confidence = 0.0 message.set("intent", { "name": intent, "confidence": confidence }, add_to_output=True)
def process(self, message: Message, **kwargs: Any) -> None: message.set("tokens", self.tokenize(message.get("spacy_doc")))
def process(self, message: Message, **kwargs: Any) -> None: updated_entities = message.get("entities", [])[:] self.replace_synonyms(updated_entities) message.set("entities", updated_entities, add_to_output=True)
def process(self, message: Message, **kwargs: Any) -> None: intent = {"name": self.parse(message.text), "confidence": 1.0} message.set("intent", intent, add_to_output=True)
def process(self, message: Message, **kwargs: Any) -> None: message.set("tokens", self.tokenize(message.text))
def process(self, message: Message, **kwargs: Any) -> None: updated = self._text_features_with_regex(message) message.set("text_features", updated)
def process(self, message: Message, **kwargs: Any): updated = self._text_features_with_ngrams(message, self.best_num_ngrams) message.set("text_features", updated)
def process(self, message: Message, **kwargs: Any) -> None: message.set("spacy_doc", self.doc_for_text(message.text))