def _set_lm_features(self, message: Message, attribute: Text = TEXT) -> None: """Adds the precomputed word vectors to the messages features.""" doc = self._get_doc(message, attribute) if doc is None: return sequence_features = doc[SEQUENCE_FEATURES] sentence_features = doc[SENTENCE_FEATURES] final_sequence_features = Features( sequence_features, FEATURE_TYPE_SEQUENCE, attribute, self.component_config[FEATURIZER_CLASS_ALIAS], ) message.add_features(final_sequence_features) final_sentence_features = Features( sentence_features, FEATURE_TYPE_SENTENCE, attribute, self.component_config[FEATURIZER_CLASS_ALIAS], ) message.add_features(final_sentence_features)
def _set_spacy_features(self, message: Message, attribute: Text = TEXT) -> None: """Adds the spacy word vectors to the messages features.""" doc = self.get_doc(message, attribute) if doc is None: return # in case an empty spaCy model was used, no vectors are present if doc.vocab.vectors_length == 0: logger.debug( "No features present. You are using an empty spaCy model.") return sequence_features = self._features_for_doc(doc) sentence_features = self._calculate_sentence_features( sequence_features, self.pooling_operation) final_sequence_features = Features( sequence_features, FEATURE_TYPE_SEQUENCE, attribute, self.component_config[FEATURIZER_CLASS_ALIAS], ) message.add_features(final_sequence_features) final_sentence_features = Features( sentence_features, FEATURE_TYPE_SENTENCE, attribute, self.component_config[FEATURIZER_CLASS_ALIAS], ) message.add_features(final_sentence_features)
def process(self, message: Message, **kwargs: Any) -> None: mitie_feature_extractor = self._mitie_feature_extractor(**kwargs) tokens = train_utils.tokens_without_cls(message) features = self.features_for_tokens(tokens, mitie_feature_extractor) final_features = Features( features, TEXT, self.component_config[FEATURIZER_CLASS_ALIAS]) message.add_features(final_features)
def process( self, message: Message, *, tf_hub_module: Any = None, **kwargs: Any ) -> None: features = self._compute_features([message], tf_hub_module)[0] final_features = Features( features, TEXT, self.component_config[FEATURIZER_CLASS_ALIAS] ) message.add_features(final_features)
def _text_features_with_regex(self, message: Message, attribute: Text) -> None: if self.known_patterns: features = self._features_for_patterns(message, attribute) if features is not None: final_features = Features( features, attribute, self.component_config[FEATURIZER_CLASS_ALIAS]) message.add_features(final_features)
def process_training_example(self, example: Message, attribute: Text, mitie_feature_extractor: Any): tokens = train_utils.tokens_without_cls(example, attribute) if tokens is not None: features = self.features_for_tokens(tokens, mitie_feature_extractor) final_features = Features( features, attribute, self.component_config[FEATURIZER_CLASS_ALIAS]) example.add_features(final_features)
def _create_sparse_features(self, message: Message) -> None: """Convert incoming messages into sparse features using the configured features.""" import scipy.sparse # [:-1] to remove CLS token tokens = message.get(TOKENS_NAMES[TEXT])[:-1] sentence_features = self._tokens_to_features(tokens) one_hot_feature_vector = self._features_to_one_hot(sentence_features) sparse_features = scipy.sparse.coo_matrix(one_hot_feature_vector) final_features = Features( sparse_features, TEXT, self.component_config[FEATURIZER_CLASS_ALIAS] ) message.add_features(final_features)
def process(self, message: Message, **kwargs: Any) -> None: """Process incoming message and compute and set features""" if self.vectorizers is None: logger.error("There is no trained CountVectorizer: " "component is either not trained or " "didn't receive enough training data") return attribute = TEXT message_tokens = self._get_processed_message_tokens_by_attribute( message, attribute) # features shape (1, seq, dim) features = self._create_sequence(attribute, [message_tokens]) if features[0] is not None: final_features = Features( features[0], attribute, self.component_config[FEATURIZER_CLASS_ALIAS]) message.add_features(final_features)
def _set_features( self, message: Message, sequence_features: np.ndarray, sentence_features: np.ndarray, attribute: Text, ): final_sequence_features = Features( sequence_features, FEATURE_TYPE_SEQUENCE, attribute, self.component_config[FEATURIZER_CLASS_ALIAS], ) message.add_features(final_sequence_features) final_sentence_features = Features( sentence_features, FEATURE_TYPE_SENTENCE, attribute, self.component_config[FEATURIZER_CLASS_ALIAS], ) message.add_features(final_sentence_features)
def _text_features_with_regex(self, message: Message, attribute: Text) -> None: if self.known_patterns: sequence_features, sentence_features = self._features_for_patterns( message, attribute ) if sequence_features is not None: final_sequence_features = Features( sequence_features, FEATURE_TYPE_SEQUENCE, attribute, self.component_config[FEATURIZER_CLASS_ALIAS], ) message.add_features(final_sequence_features) if sentence_features is not None: final_sentence_features = Features( sentence_features, FEATURE_TYPE_SENTENCE, attribute, self.component_config[FEATURIZER_CLASS_ALIAS], ) message.add_features(final_sentence_features)
def _set_spacy_features(self, message: Message, attribute: Text = TEXT) -> None: """Adds the spacy word vectors to the messages features.""" doc = self.get_doc(message, attribute) if doc is None: return # in case an empty spaCy model was used, no vectors are present if doc.vocab.vectors_length == 0: logger.debug( "No features present. You are using an empty spaCy model.") return features = self._features_for_doc(doc) cls_token_vec = self._calculate_cls_vector(features, self.pooling_operation) features = np.concatenate([features, cls_token_vec]) final_features = Features( features, attribute, self.component_config[FEATURIZER_CLASS_ALIAS]) message.add_features(final_features)