def preprocess_train_data(self, training_data: TrainingData) -> RasaModelData: """Prepares data for training. Performs sanity checks on training data, extracts encodings for labels. """ if self.retrieval_intent: training_data = training_data.filter_by_intent(self.retrieval_intent) label_id_index_mapping = self._label_id_index_mapping( training_data, attribute=RESPONSE ) if not label_id_index_mapping: # no labels are present to train return RasaModelData() self.index_label_id_mapping = self._invert_mapping(label_id_index_mapping) self._label_data = self._create_label_data( training_data, label_id_index_mapping, attribute=RESPONSE ) model_data = self._create_model_data( training_data.intent_examples, label_id_index_mapping, label_attribute=RESPONSE, ) self._check_input_dimension_consistency(model_data) return model_data
def preprocess_train_data(self, training_data: TrainingData) -> RasaModelData: """Prepares data for training. Performs sanity checks on training data, extracts encodings for labels. """ if self.retrieval_intent: training_data = training_data.filter_by_intent( self.retrieval_intent) else: # retrieval intent was left to its default value logger.info( "Retrieval intent parameter was left to its default value. This " "response selector will be trained on training examples combining " "all retrieval intents.") label_id_index_mapping = self._label_id_index_mapping( training_data, attribute=RESPONSE) self.retrieval_intent_mapping = self._create_retrieval_intent_mapping( training_data) if not label_id_index_mapping: # no labels are present to train return RasaModelData() self.index_label_id_mapping = self._invert_mapping( label_id_index_mapping) self._label_data = self._create_label_data(training_data, label_id_index_mapping, attribute=RESPONSE) model_data = self._create_model_data( training_data.intent_examples, label_id_index_mapping, label_attribute=RESPONSE, ) self._check_input_dimension_consistency(model_data) return model_data