def train( self, training_trackers: List["DialogueStateTracker"], domain: "Domain", **kwargs: Any, ) -> None: """Train the policy on given training trackers.""" logger.debug("Started training embedding policy.") # set numpy random seed np.random.seed(self.random_seed) # dealing with training data training_data = self.featurize_for_training(training_trackers, domain, **kwargs) # encode all label_ids with policies' featurizer state_featurizer = self.featurizer.state_featurizer self._encoded_all_label_ids = state_featurizer.create_encoded_all_actions( domain) # check if number of negatives is less than number of label_ids logger.debug("Check if num_neg {} is smaller " "than number of label_ids {}, " "else set num_neg to the number of label_ids - 1" "".format(self.num_neg, domain.num_actions)) # noinspection PyAttributeOutsideInit self.num_neg = min(self.num_neg, domain.num_actions - 1) # extract actual training data to feed to tf session session_data = self._create_session_data(training_data.X, training_data.y) if self.evaluate_on_num_examples: session_data, eval_session_data = train_utils.train_val_split( session_data, self.evaluate_on_num_examples, self.random_seed, label_key="action_ids", ) else: eval_session_data = None self.graph = tf.Graph() with self.graph.as_default(): # set random seed in tf tf.set_random_seed(self.random_seed) # allows increasing batch size batch_size_in = tf.placeholder(tf.int64) ( self._iterator, train_init_op, eval_init_op, ) = train_utils.create_iterator_init_datasets( session_data, eval_session_data, batch_size_in, self.batch_strategy, label_key="action_ids", ) self._is_training = tf.placeholder_with_default(False, shape=()) loss, acc = self._build_tf_train_graph() # define which optimizer to use self._train_op = tf.train.AdamOptimizer().minimize(loss) # train tensorflow graph self.session = tf.Session(config=self._tf_config) train_utils.train_tf_dataset( train_init_op, eval_init_op, batch_size_in, loss, acc, self._train_op, self.session, self._is_training, self.epochs, self.batch_size, self.evaluate_on_num_examples, self.evaluate_every_num_epochs, ) # rebuild the graph for prediction self.pred_confidence = self._build_tf_pred_graph(session_data) self.attention_weights = train_utils.extract_attention( self.attention_weights)
def train( self, training_data: "TrainingData", cfg: Optional["RasaNLUModelConfig"] = None, **kwargs: Any, ) -> None: """Train the embedding intent classifier on a data set.""" logger.debug("Started training embedding classifier.") # set numpy random seed np.random.seed(self.random_seed) session_data = self.preprocess_train_data(training_data) possible_to_train = self._check_enough_labels(session_data) if not possible_to_train: logger.error("Can not train a classifier. " "Need at least 2 different classes. " "Skipping training of classifier.") return if self.evaluate_on_num_examples: session_data, eval_session_data = train_utils.train_val_split( session_data, self.evaluate_on_num_examples, self.random_seed, label_key="label_ids", ) else: eval_session_data = None self.graph = tf.Graph() with self.graph.as_default(): # set random seed tf.set_random_seed(self.random_seed) # allows increasing batch size batch_size_in = tf.placeholder(tf.int64) ( self._iterator, train_init_op, eval_init_op, ) = train_utils.create_iterator_init_datasets( session_data, eval_session_data, batch_size_in, self.batch_in_strategy, label_key="label_ids", ) self._is_training = tf.placeholder_with_default(False, shape=()) loss, acc = self._build_tf_train_graph(session_data) # define which optimizer to use self._train_op = tf.train.AdamOptimizer().minimize(loss) # train tensorflow graph self.session = tf.Session(config=self._tf_config) train_utils.train_tf_dataset( train_init_op, eval_init_op, batch_size_in, loss, acc, self._train_op, self.session, self._is_training, self.epochs, self.batch_in_size, self.evaluate_on_num_examples, self.evaluate_every_num_epochs, ) # rebuild the graph for prediction self.pred_confidence = self._build_tf_pred_graph(session_data)