def _create_session_data( self, training_data: "TrainingData", label_id_dict: Dict[Text, int], attribute: Text, ) -> "train_utils.SessionData": """Prepare data for training and create a SessionData object""" X = [] label_ids = [] Y = [] for e in training_data.intent_examples: if e.get(attribute): X.append( e.get( MESSAGE_VECTOR_FEATURE_NAMES[MESSAGE_TEXT_ATTRIBUTE])) label_ids.append(label_id_dict[e.get(attribute)]) X = np.array(X) label_ids = np.array(label_ids) for label_id_idx in label_ids: Y.append(self._encoded_all_label_ids[label_id_idx]) Y = np.array(Y) return train_utils.SessionData(X=X, Y=Y, label_ids=label_ids)
def _create_session_data( self, data_X: "np.ndarray", data_Y: Optional["np.ndarray"] = None ) -> "train_utils.SessionData": """Combine all tf session related data into a named tuple""" if data_Y is not None: # training time label_ids = self._label_ids_for_Y(data_Y) Y = self._label_features_for_Y(label_ids) # idea taken from sklearn's stratify split if label_ids.ndim == 2: # for multi-label y, map each distinct row to a string repr # using join because str(row) uses an ellipsis if len(row) > 1000 label_ids = np.array([" ".join(row.astype("str")) for row in label_ids]) else: # prediction time label_ids = None Y = None return train_utils.SessionData(X=data_X, Y=Y, label_ids=label_ids)