def train(self, train_dataset): train_data = du.chunks( train_dataset['trainX'], self.__config['batchSize'], train_dataset['trainY']) n_batches = math.ceil( len(train_dataset['trainX'])/self.__config['batchSize']) self.__logger('Start training classification model!') enough_accuracy_reached = False m = self.__model intents = self.__dataset_params["intents"] num_classes = len(intents) # === Visualization code block === # sentence = 'please remind to me watch real madrid match tomorrow at 9pm' # x_viz = self.__embeddings_model.embed([sentence]) # visualize(x_viz, 'embedded_sentence') # visualize_layer_output('classConv1', m, x_viz, 'class/conv1-') # visualize_layer_output('classConv2', m, x_viz, 'class/conv2-') # visualize_layer_output('classConv3', m, x_viz, 'class/conv3-') # visualize(m.predict(x_viz), 'class/output-') # === END visualization code block === for idx, t_chunk in enumerate(train_data): if enough_accuracy_reached: break x = self.__embeddings_model.embed(t_chunk[0]) y = to_categorical(np.array(t_chunk[1], dtype=np.int32), num_classes) self.__logger(f'Training batch {idx+1}.') m.fit( x=x, y=y, # batch_size=self.__config['batchSize'], shuffle=True, epochs=self.__config['epochs'], verbose=0, validation_split=self.__config['trainingValidationSplit'], ) # === Visualization code block === # visualize_layer_output('classConv1', m, x_viz, f'class/conv1-{idx}') # visualize_layer_output('classConv2', m, x_viz, f'class/conv2-{idx}') # visualize_layer_output('classConv3', m, x_viz, f'class/conv3-{idx}') # visualize(m.predict(x_viz), f'class/output-{idx}') # === END visualization code block === self.__logger( f'Trained {m.history.epoch[-1]+1} epochs on batch {idx + 1} of {n_batches}') self.__logger( f'Training Loss: {m.history.history["loss"][-1]} | Training Accuracy: {m.history.history["acc"][-1]}') self.__logger( f'Validation Loss: {m.history.history["val_loss"][-1]} | Validation Accuracy: {m.history.history["val_acc"][-1]}') self.__logger( '==================================================================================================') if (self.__config["lossThresholdToStopTraining"] > 0 and m.history.history["loss"][-1] < self.__config["lossThresholdToStopTraining"] and m.history.history["val_loss"][-1] < self.__config["lossThresholdToStopTraining"]): enough_accuracy_reached = True self.__logger( f'Enough accuracy reached! Ending training after batch {idx + 1} of {n_batches}') self.__logger( '==================================================================================================')
def test(self, test_examples, results_handler=None): chunks = du.chunks( test_examples['testX'], self.__config['batchSize'], test_examples['testY']) handler = results_handler if results_handler != None else self.__default_results_logger stats = {'correct': 0, 'wrong': 0, 'lowConfidence': 0} for t_chunk in chunks: x = t_chunk[0] # sentences y = t_chunk[1] # intents code per sentence predictions = self.predict(x) handler(x, y, predictions, stats) return stats
def train(self, train_dataset): train_data = du.chunks(train_dataset['trainX'], self.__config['batchSize'], train_dataset['trainY']) n_batches = math.ceil( len(train_dataset['trainX']) / self.__config['batchSize']) self.__logger(f'Start training classification model!') enough_accuracy_reached = False m = self.__model intents = self.__dataset_params["intents"] num_classes = len(intents) for idx, t_chunk in enumerate(train_data): if enough_accuracy_reached: break x = self.__embeddings_model.embed(t_chunk[0]) y = to_categorical(np.array(t_chunk[1], dtype=np.int32), num_classes) self.__logger(f'Training batch {idx+1}.') m.fit( x=x, y=y, # batch_size=self.__config['batchSize'], shuffle=True, epochs=self.__config['epochs'], verbose=0, validation_split=self.__config['trainingValidationSplit'], ) self.__logger( f'Trained {m.history.epoch[-1]+1} epochs on batch {idx + 1} of {n_batches}' ) self.__logger( f'Training Loss: {m.history.history["loss"][-1]} | Training Accuracy: {m.history.history["acc"][-1]}' ) self.__logger( f'Validation Loss: {m.history.history["val_loss"][-1]} | Validation Accuracy: {m.history.history["val_acc"][-1]}' ) self.__logger( '==================================================================================================' ) if (self.__config["lossThresholdToStopTraining"] > 0 and m.history.history["loss"][-1] < self.__config["lossThresholdToStopTraining"] and m.history.history["val_loss"][-1] < self.__config["lossThresholdToStopTraining"]): enough_accuracy_reached = True self.__logger( f'Enough accuracy reached! Ending training after batch {idx + 1} of {n_batches}' ) self.__logger( '==================================================================================================' )
def test(self, test_examples, results_handler=None): handler = results_handler if results_handler != None else self.__default_results_logger chunks = du.chunks(test_examples['testX'], self.__config['batchSize'], test_examples['testY'], test_examples['testY2']) stats = {'correct': 0, 'wrong': 0} for t_chunk in chunks: test_x = t_chunk[0] # sentences test_y = t_chunk[1] # intents code per sentence test_y2 = t_chunk[2] # slots encoded per sentence word p_intent = [{ 'confidence': 1, 'intent': self.__dataset_params['intents'][test_y[sentence_id]], 'sentence': sentence, } for sentence_id, sentence in enumerate(test_x)] predictions = self.raw_prediction(test_x, p_intent) preds = [] for sentences in predictions: preds.append( [sentence['highestIndex'] for sentence in sentences]) handler(test_x, test_y2, preds, stats) return stats
def train(self, train_dataset): chunks = du.chunks(train_dataset['trainX'], self.__config['batchSize'], train_dataset['trainY'], train_dataset['trainY2']) self.__logger( f'Start training NER model! (attention enabled: {self.__config["addAttention"]})' ) enough_accuracy_reached = False m = self.__model num_slot_types = len(self.__dataset_params["slotsToId"].keys()) n_batches = math.ceil( len(train_dataset['trainX']) / self.__config['batchSize']) for idx, t_chunk in enumerate(chunks): train_x_chunks = t_chunk[0] # sentences train_y_chunks = t_chunk[1] # intents code per sentence train_y2_chunks = t_chunk[2] # slots encoded per sentence word if enough_accuracy_reached: break intent_labels = to_categorical( np.array(train_y_chunks, dtype=np.int32), len(self.__dataset_params['intents'])) embedded_sentence_words = self.__embeddings_model.embed( train_x_chunks) embedded_sentence_word_chars = self.__embeddings_model.embed_by_word_characters( train_x_chunks) y2_sentences = [] for words_slot_id in train_y2_chunks: slot_ids = np.array(words_slot_id, dtype=np.int32) pad_width = self.__dataset_params['maxWordsPerSentence'] - len( words_slot_id) padded_slot_ids = np.pad(slot_ids, [[0, pad_width]], mode='constant') y2_sentences.append( to_categorical(padded_slot_ids, num_slot_types)) slot_tags = np.stack(y2_sentences) m.fit( x=[ intent_labels, embedded_sentence_words, embedded_sentence_word_chars ], y=slot_tags, shuffle=True, # batch_size=self.__config['batchSize'], # IMPORTANT: adding batch size here makes the optimization bad epochs=self.__config['epochs'], verbose=0, validation_split=self.__config['trainingValidationSplit'], ) self.__logger( f'Trained {m.history.epoch[-1]+1} epochs on batch {idx + 1} of {n_batches}' ) self.__logger( f'Training Loss: {m.history.history["loss"][-1]} | Training Accuracy: {m.history.history["acc"][-1]}' ) self.__logger( f'Validation Loss: {m.history.history["val_loss"][-1]} | Validation Accuracy: {m.history.history["val_acc"][-1]}' ) self.__logger( '==================================================================================================' ) if (self.__config["lossThresholdToStopTraining"] > 0 and m.history.history["loss"][-1] < self.__config["lossThresholdToStopTraining"] and m.history.history["val_loss"][-1] < self.__config["lossThresholdToStopTraining"]): enough_accuracy_reached = True self.__logger( f'Enough accuracy reached! Ending training after batch {idx + 1} of {n_batches}' ) self.__logger( '==================================================================================================' )