def load(self): load_folder_path = self.save_folder_path with open(os.path.join(load_folder_path, 'params.json'), 'r') as json_file: model_params = json.load(json_file) slots_num = model_params['slots_num'] intents_num = model_params['intents_num'] bert_model_hub_path = model_params['bert_hub_path'] num_bert_fine_tune_layers = model_params['num_bert_fine_tune_layers'] is_bert = model_params['is_bert'] if 'is_crf' in model_params: is_crf = model_params['is_crf'] else: is_crf = False self.bert_vectorizer = BERTVectorizer(self.sess, is_bert, bert_model_hub_path) with open(os.path.join(load_folder_path, 'tags_vectorizer.pkl'), 'rb') as handle: self.tags_vectorizer = pickle.load(handle) slots_num = len(self.tags_vectorizer.label_encoder.classes_) with open(os.path.join(load_folder_path, 'intents_label_encoder.pkl'), 'rb') as handle: self.intents_label_encoder = pickle.load(handle) intents_num = len(self.intents_label_encoder.classes_) if is_crf: self.model = JointBertCRFModel.load(load_folder_path, self.sess) else: self.model = JointBertModel.load(load_folder_path, self.sess)
def initialize(): global graph graph = tf.get_default_graph() global sess sess = tf.compat.v1.Session() set_session(sess) global bert_vectorizer bert_vectorizer = BERTVectorizer(sess, is_bert, bert_model_hub_path) # loading models print('Loading models ...') if not os.path.exists(load_folder_path): print('Folder `%s` not exist' % load_folder_path) global slots_num global tags_vectorizer with open(os.path.join(load_folder_path, 'tags_vectorizer.pkl'), 'rb') as handle: tags_vectorizer = pickle.load(handle) slots_num = len(tags_vectorizer.label_encoder.classes_) global intents_num global intents_label_encoder with open(os.path.join(load_folder_path, 'intents_label_encoder.pkl'), 'rb') as handle: intents_label_encoder = pickle.load(handle) intents_num = len(intents_label_encoder.classes_) global model try: model = JointBertModel.load(load_folder_path, sess) except OSError: model = JointBertCRFModel.load(load_folder_path, sess)
def initialize(): global bert_vectorizer bert_vectorizer = BERTVectorizer(is_bert, bert_model_hub_path) # loading models print('Loading models ...') if not os.path.exists(load_folder_path): print('Folder `%s` not exist' % load_folder_path) global slots_num global tags_vectorizer with open(os.path.join(load_folder_path, 'tags_vectorizer.pkl'), 'rb') as handle: tags_vectorizer = pickle.load(handle) slots_num = len(tags_vectorizer.label_encoder.classes_) global intents_num global intents_label_encoder with open(os.path.join(load_folder_path, 'intents_label_encoder.pkl'), 'rb') as handle: intents_label_encoder = pickle.load(handle) intents_num = len(intents_label_encoder.classes_) global model model = JointBertModel.load(load_folder_path)
# loading models print('Loading models ...') if not os.path.exists(load_folder_path): print('Folder `%s` not exist' % load_folder_path) with open(os.path.join(load_folder_path, 'tags_vectorizer.pkl'), 'rb') as handle: tags_vectorizer = pickle.load(handle) slots_num = len(tags_vectorizer.label_encoder.classes_) with open(os.path.join(load_folder_path, 'intents_label_encoder.pkl'), 'rb') as handle: intents_label_encoder = pickle.load(handle) intents_num = len(intents_label_encoder.classes_) model = JointBertModel.load(load_folder_path, sess) data_text_arr, data_tags_arr, data_intents = Reader.read(data_folder_path) data_input_ids, data_input_mask, data_segment_ids, data_valid_positions, data_sequence_lengths = bert_vectorizer.transform( data_text_arr) def get_results(input_ids, input_mask, segment_ids, valid_positions, sequence_lengths, tags_arr, intents, tags_vectorizer, intents_label_encoder): predicted_tags, predicted_intents = model.predict_slots_intent( [input_ids, input_mask, segment_ids, valid_positions], tags_vectorizer, intents_label_encoder, remove_start_end=True) gold_tags = [x.split() for x in tags_arr]
slots_num = len(tags_vectorizer.label_encoder.classes_) print('encode labels ...') intents_label_encoder = LabelEncoder() intents = intents_label_encoder.fit_transform(intents).astype(np.int32) intents_num = len(intents_label_encoder.classes_) if model is None: model = JointBertModel(slots_num, intents_num, bert_model_hub_path, sess, num_bert_fine_tune_layers=3, is_bert=is_bert) else: model = JointBertModel.load(save_folder_path, sess) print('training model ...') #checkpointer = ModelCheckpoint(filepath='/tmp/weights.hdf5', verbose=1, save_best_only=True) X = np.concatenate((input_ids, input_mask, segment_ids, valid_positions, tags), axis=1) Y = intents split_width = input_ids.shape[1] history = {} for i in range(epochs): folds = StratifiedKFold(n_splits=5, shuffle=True).split(X, Y) for train_index, val_index in folds: X_train, X_val = X[train_index], X[val_index]
print('encode labels ...') intents_label_encoder = LabelEncoder() train_intents = intents_label_encoder.fit_transform(train_intents).astype( np.int32) val_intents = intents_label_encoder.transform(val_intents).astype(np.int32) intents_num = len(intents_label_encoder.classes_) if start_model_folder_path is None or start_model_folder_path == '': model = JointBertModel(slots_num, intents_num, bert_model_hub_path, num_bert_fine_tune_layers=10, is_bert=is_bert) else: model = JointBertModel.load(start_model_folder_path) print('training model ...') model.fit([ train_input_ids, train_input_mask, train_segment_ids, train_valid_positions ], [train_tags, train_intents], validation_data=([ val_input_ids, val_input_mask, val_segment_ids, val_valid_positions ], [val_tags, val_intents]), epochs=epochs, batch_size=batch_size) ### saving print('Saving ..') if not os.path.exists(save_folder_path):