def __init__(self, ): #分词词典加载 with cs.open('../data/segment_dic.txt', 'r', 'utf-8') as fp: segment_dic = {} for line in fp: if line.strip(): segment_dic[line.strip()] = 0 self.segment_dic = segment_dic self.max_seq_len = 20 begin = time.time() jieba.load_userdict('../data/segment_dic.txt') print('加载用户分词词典时间为:%.2f' % (time.time() - begin)) #加载训练好的实体识别模型 custom_objects = get_custom_objects() self.ner_model = load_model('../data/model/ner_model.h5', custom_objects=custom_objects) #加载bert tokenlizer dict_path = '../../news_classifer_task/wwm/vocab.txt' token_dict = {} with cs.open(dict_path, 'r', 'utf8') as reader: for line in reader: token = line.strip() token_dict[token] = len(token_dict) self.tokenizer = Tokenizer(token_dict) print('mention extractor loaded')
def __init__(self, model_dir, batch=128): model_path = model_dir.format(r"best_model.hdf5") self.processer: BertPreProcess = dill.load( open(model_dir.format(r"process.dill"), "rb")) self.model = load_model(model_path, custom_objects=get_custom_objects()) self.batch = batch
def load_model_encoder_details(model_path, encoder_path, details_path): custom_objects = get_custom_objects() my_objects = {'acc_top2': acc_top2} custom_objects.update(my_objects) model = load_model(model_path, custom_objects=custom_objects) encoder = joblib.load(encoder_path) nclass_dict = joblib.load(details_path) return model, encoder, nclass_dict['nclass']
def __init__(self, model_path): self.config = get_config_from_json('.//config.json') self.load_path = model_path self.val_data_dir = self.config.paths.val_data_dir self.test_data_dir = self.config.paths.test_data_dir self.model = tf.keras.models.load_model( self.load_path, custom_objects=get_custom_objects()) print("Model loaded succesfully from " + self.load_path)
def test_sample(self): model = get_model( token_num=200, head_num=3, transformer_num=2, ) model_path = os.path.join(tempfile.gettempdir(), 'keras_bert_%f.h5' % np.random.random()) model.save(model_path) from tensorflow.python.keras.utils.generic_utils import CustomObjectScope with CustomObjectScope(get_custom_objects( )): # Workaround for incorrect global variable used in keras model = keras.models.load_model( model_path, custom_objects=get_custom_objects(), ) model.summary(line_length=200)
def load(self, checkpoint_path): """ loads an H5 file :param checkpoint_path:file path :return: """ self.model = keras.models.load_model( checkpoint_path, custom_objects=get_custom_objects())
class SentimentConfig(AppConfig): name = 'sentiment' json_file = open("sentiment/model/model.json",'r') features = pickle.load(open('sentiment/model/tf_model.preproc', 'rb')) loaded_model_json = json_file.read() json_file.close() loaded_model = model_from_json(loaded_model_json,custom_objects=get_custom_objects()) loaded_model.load_weights("sentiment/model/model.h5")
def build_model(lr: float, lr_d: float, process: BertPreProcess): """data""" train_gener = DataGener("train.json.crf.m30.CRFDropModel.expand.pre.json", processer=process, batch_size=8, max_len=-1) val_gener = DataGener("validate.json.crf.m30.CRFDropModel.expand.pre.json", processer=process, batch_size=16, max_len=-1) """layers""" inp_a = Input(shape=(None, )) inp_b = Input(shape=(None, )) out = get_layer(inp_a, inp_b) """call back""" check_point = ModelCheckpoint(model_path, monitor="val_loss", verbose=1, save_best_only=True, mode="min") early_stop = EarlyStopping(monitor="val_loss", mode="min", patience=2) tb_cb = TensorBoard(log_dir=log_filepath) metrics = Metrics() """fine-tune""" model = Model(inputs=[inp_a, inp_b], outputs=out) model.trainable = True # for layer in model.layers[:1]: # layer.trainable = False model.summary() """train""" # vald = val_gener.get_bert_pair_text_all() # trnd = train_gener.get_bert_pair_text_all() # model.compile(loss="binary_crossentropy", optimizer=Adam(lr=lr, decay=lr_d), metrics=["accuracy", f1]) model.compile(loss="binary_crossentropy", optimizer=Adam(lr=lr, decay=lr_d), metrics=["accuracy"]) # model.fit(x=trnd[0], # y=trnd[1], # validation_data=vald, # epochs=3, # class_weight="auto", # callbacks=[check_point, early_stop, tb_cb]) model.fit_generator(train_gener.__iter__(), steps_per_epoch=train_gener.__len__(), epochs=5, validation_data=val_gener.__iter__(), validation_steps=val_gener.__len__(), class_weight="auto", callbacks=[check_point, early_stop, tb_cb]) model.save(model_path) K.clear_session() tf.reset_default_graph() model = load_model(model_path, custom_objects=get_custom_objects()) return model
def test_save_load_json(self): model = get_model( token_num=200, head_num=3, transformer_num=2, ) data = model.to_json() model = keras.models.model_from_json(data, custom_objects=get_custom_objects()) model.summary()
def build_model(lr: float, lr_d: float, process: BertNerProcess): """data""" # validate train train_gener = DataGener("t.json", processer=process, batch_size=32) val_gener = DataGener("t.json", processer=process, batch_size=64) """layers""" x1_in = Input(shape=(None, )) # 待识别句子输入 x2_in = Input(shape=(None, )) # 待识别句子输入 s1_in = Input(shape=(None, )) # 实体左边界(标签) s2_in = Input(shape=(None, )) # 实体右边界(标签) p1, p2 = get_layer(x1_in, x2_in) """call back""" check_point = ModelCheckpoint(model_path, monitor="val_loss", verbose=1, save_best_only=True, mode="min") early_stop = EarlyStopping(monitor="val_loss", mode="min", patience=2) tb_cb = TensorBoard(log_dir=log_filepath) """fine-tune""" model = Model(inputs=[x1_in, x2_in], outputs=[p1, p2]) # model.trainable = True # for layer in model.layers[:1]: # layer.trainable = False model.summary() """train""" # vald = val_gener.get_bert_pair_text_all() # trnd = train_gener.get_bert_pair_text_all() # model.compile(loss="binary_crossentropy", optimizer=Adam(lr=lr, decay=lr_d), metrics=["accuracy", f1]) loss1 = K.mean(K.categorical_crossentropy(s1_in, p1, from_logits=True)) p2 -= (1 - K.cumsum(s1_in, 1)) * 1e10 loss2 = K.mean(K.categorical_crossentropy(s2_in, p2, from_logits=True)) loss = loss1 + loss2 model.add_loss(loss) model.compile(optimizer=Adam(lr=lr, decay=lr_d)) # model.fit(x=trnd[0], # y=trnd[1], # validation_data=vald, # epochs=3, # class_weight="auto", # callbacks=[check_point, early_stop, tb_cb]) model.fit_generator(train_gener.__iter__(), steps_per_epoch=train_gener.__len__(), epochs=5, validation_data=val_gener.__iter__(), validation_steps=val_gener.__len__(), class_weight="auto", callbacks=[check_point, early_stop, tb_cb]) model.save(model_path) K.clear_session() tf.reset_default_graph() model = load_model(model_path, custom_objects=get_custom_objects()) return model
def __init__(self): self.dp = DataProcess() self.abs_path = os.path.join(DATA_DIR, "bert_ner.h5") c = get_custom_objects() c.update({ "CRF": CRF, 'crf_loss': crf_loss, 'crf_viterbi_accuracy': crf_accuracy }) self.model = load_model(self.abs_path, custom_objects=c)
def load_ner_model(ner_model_dir): with open(_ner_config_path(ner_model_dir)) as f: config = json.load(f) model = keras.models.load_model(_ner_model_path(ner_model_dir), custom_objects=get_custom_objects()) tokenizer = tokenization.FullTokenizer( vocab_file=_ner_vocab_path(ner_model_dir), do_lower_case=config['do_lower_case']) labels = read_labels(_ner_labels_path(ner_model_dir)) return model, tokenizer, labels, config
def on_epoch_begin(self, epoch, logs=None): if epoch == 0: print("[!] test load&save model") f = self.filename + ".h5" custom_objects = get_custom_objects() self.model.save(f, include_optimizer=False, overwrite=True) if "bert" in cfg["verbose"]: model_ = load_model(f, custom_objects=custom_objects) else: model_ = load_model(f)
def load(self, model_path): """ load the pre-trained model """ try: self.albert_model = load_model(str(model_path), custom_objects=get_custom_objects(), compile=False) except Exception as ex: print('load error') return self
def on_epoch_begin(self, epoch, logs=None): if epoch == 0: print("[!] test load&save model") f = self.filename + ".h5" f = os.path.join(SAVE_DIR, f) self.model.save(f, include_optimizer=False, overwrite=False) if "albert" in cfg["verbose"]: model_ = load_model(f) elif "nezha" in cfg["verbose"]: model_ = load_model(f) else: model_ = load_model(f, custom_objects=get_custom_objects())
def test_save_load_json(self): model = get_model( token_num=200, head_num=3, transformer_num=2, attention_activation='gelu', ) compile_model(model) data = model.to_json() model = keras.models.model_from_json( data, custom_objects=get_custom_objects()) model.summary()
def __init__(self): self.maxlen = 512 self.sp_path = staticfiles_storage.path( 'entrysheet/bert/wiki-ja.model') self.sp = spm.SentencePieceProcessor() self.sp.Load(self.sp_path) self.model_path = staticfiles_storage.path( 'entrysheet/bert/bert_check_point.model') self.model = load_model(self.model_path, custom_objects=get_custom_objects())
def _get_embed_by_bert(X): with timed_bolck(f'Prepare train model'): from keras_bert import load_trained_model_from_checkpoint model = load_trained_model_from_checkpoint( config_path, checkpoint_path, training=True, seq_len=SEQ_LEN, ) #model.summary(line_length=120) from tensorflow.python import keras from keras_bert import AdamWarmup, calc_train_steps inputs = model.inputs[:2] dense = model.get_layer('NSP-Dense').output model = keras.models.Model(inputs, dense) #.summary() with timed_bolck(f'try to gen embed DF{len(X)}'): input1_col = [ col for col in X.columns if str(col).startswith('bert_') ] # train_x, train_y = filter_short_desc(train_x, train_y) input1 = X.loc[:, input1_col] # .astype(np.float32) input2 = np.zeros_like(input1) # .astype(np.int8) logger.info(f'NN Input1:{input1.shape}, Input2:{input2.shape}') label2id, id2label = get_label_id() from keras_bert import get_custom_objects import tensorflow as tf with tf.keras.utils.custom_object_scope(get_custom_objects()): res_list = [] partition_len = 5000 for sn in tqdm(range(1 + len(X) // partition_len), 'gen embeding'): tmp = X.iloc[sn * partition_len:(sn + 1) * partition_len] # print('\nbegin tmp\n', tmp.iloc[:3,:3].head()) res = model.predict([ tmp.loc[:, input1_col], np.zeros_like(tmp.loc[:, input1_col]) ]) res = pd.DataFrame(res, index=tmp.index).add_prefix('embd_bert') # print('\nend tmp\n', res.iloc[:3, :3].head()) res_list.append(res) res = pd.concat(res_list) return res
def test_sample(self): model = get_model( token_num=200, head_num=3, transformer_num=2, ) model_path = os.path.join(tempfile.gettempdir(), 'keras_bert_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model( model_path, custom_objects=get_custom_objects(), ) model.summary(line_length=200)
def load_model(input_model_path, input_json_path=None, input_yaml_path=None): if not Path(input_model_path).exists(): raise FileNotFoundError( 'Model file `{}` does not exist.'.format(input_model_path)) try: model = load_keras_model(input_model_path, custom_objects=get_custom_objects()) return model except FileNotFoundError as err: logging.error('Input mode file (%s) does not exist.', FLAGS.input_model) raise err except ValueError as wrong_file_err: if input_json_path: if not Path(input_json_path).exists(): raise FileNotFoundError( 'Model description json file `{}` does not exist.'.format( input_json_path)) try: model = model_from_json(open(str(input_json_path)).read()) model.load_weights(input_model_path) return model except Exception as err: logging.error("Couldn't load model from json.") raise err elif input_yaml_path: if not Path(input_yaml_path).exists(): raise FileNotFoundError( 'Model description yaml file `{}` does not exist.'.format( input_yaml_path)) try: model = model_from_yaml(open(str(input_yaml_path)).read()) model.load_weights(input_model_path) return model except Exception as err: logging.error("Couldn't load model from yaml.") raise err else: logging.error( 'Input file specified only holds the weights, and not ' 'the model definition. Save the model using ' 'model.save(filename.h5) which will contain the network ' 'architecture as well as its weights. ' 'If the model is saved using the ' 'model.save_weights(filename) function, either ' 'input_model_json or input_model_yaml flags should be set to ' 'to import the network architecture prior to loading the ' 'weights. \n' 'Check the keras documentation for more details ' '(https://keras.io/getting-started/faq/)') raise wrong_file_err
def test_task_embed(self): inputs, outputs = get_model( token_num=20, embed_dim=12, head_num=3, transformer_num=2, use_task_embed=True, task_num=10, training=False, dropout_rate=0.0, ) model = keras.models.Model(inputs, outputs) model_path = os.path.join(tempfile.gettempdir(), 'keras_bert_%f.h5' % np.random.random()) model.save(model_path) from tensorflow.python.keras.utils.generic_utils import CustomObjectScope with CustomObjectScope(get_custom_objects( )): # Workaround for incorrect global variable used in keras model = keras.models.load_model( model_path, custom_objects=get_custom_objects(), ) model.summary(line_length=200)
def load_model( model_path: str, load_weights: bool = True ) -> Union[BaseClassificationModel, BaseLabelingModel]: """ Load saved model from saved model from `model.save` function Args: model_path: model folder path load_weights: only load model structure and vocabulary when set to False, default True. Returns: """ import keras_bert with open(os.path.join(model_path, 'model_info.json'), 'r') as f: model_info = json.load(f) model_class = pydoc.locate( f"{model_info['module']}.{model_info['class_name']}") model_json_str = json.dumps(model_info['tf_model']) model = model_class() # Fix loading bug caused by custom objects naming duplication in keras_bert and bert4keras custom_obj_1 = kashgari.custom_objects custom_obj_2 = dict(custom_obj_1) custom_obj_2.update(keras_bert.get_custom_objects()) model.tf_model = _custom_load_keras_model_from_json(model_json_str) if load_weights: model.tf_model.load_weights( os.path.join(model_path, 'model_weights.h5')) embed_info = model_info['embedding'] embed_class = pydoc.locate( f"{embed_info['module']}.{embed_info['class_name']}") embedding: Embedding = embed_class._load_saved_instance( embed_info, model_path, model.tf_model) model.embedding = embedding if type(model.tf_model.layers[-1]) == CRF: model.layer_crf = model.tf_model.layers[-1] return model
def _custom_load_keras_model_from_json(json_str): # Fix loading bug caused by custom objects naming duplication in keras_bert and bert4keras import keras_bert custom_obj_1 = kashgari.custom_objects custom_obj_2 = dict(custom_obj_1) custom_obj_2.update(keras_bert.get_custom_objects()) model, exp = None, None for custom_obj in [custom_obj_1, custom_obj_2]: try: model = tf.keras.models.model_from_json(json_str, custom_obj) break except Exception as e: exp = e if model: return model else: raise exp
def __init__(self, gpu_name, gpu_num, seq_max_len, batch_size): print('--' * 10 + ' Load BERT model start ' + '--' * 10) gpu_option(gpu_name, gpu_num) self.seq_max_len = seq_max_len # same to train self.batch_size = batch_size model_path = 'models/BERT/pretrained_model/uncased_L-24_H-1024_A-16' vocab_path = os.path.join(model_path, 'vocab.txt') # load Tokenizer token_dict = load_vocabulary(vocab_path) self.tokenizer = Tokenizer(token_dict) MODEL_SAVE_PATH = 'models/BERT/fine_tune_model/bert_fine_tune.hdf5' model = load_model(MODEL_SAVE_PATH, custom_objects=get_custom_objects(), compile=False) if gpu_num >= 2: self.par_model = multi_gpu_model(model, gpus=gpu_num) else: self.par_model = model print('--' * 10 + ' Load BERT model end ' + '--' * 10)
def test_task_embed(self): inputs, outputs = get_model( token_num=20, embed_dim=12, head_num=3, transformer_num=2, use_task_embed=True, task_num=10, training=False, dropout_rate=0.0, ) model = keras.models.Model(inputs, outputs) model_path = os.path.join(tempfile.gettempdir(), 'keras_bert_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model( model_path, custom_objects=get_custom_objects(), ) model.summary(line_length=200)
def Init(): # GPU if Config.GPUEnable == False: os.environ["CUDA_VISIBLE_DEVICES"] = "-1" # 获取新的tokenizer if SentimentClassification.tokenizer is None: SentimentClassification.tokenizer = OurTokenizer( SentimentClassification.get_token_dict()) if SentimentClassification.model is None: # 模型加载 custom_objects = get_custom_objects() my_objects = {'acc_top2': SentimentClassification.acc_top2} custom_objects.update(my_objects) app = Flask(__name__) model_path = os.path.join(app.static_folder, Config.model_path) SentimentClassification.model = load_model( model_path, custom_objects=custom_objects)
def load_model(train_dir): try: if os.path.isfile(train_dir): model_path = train_dir elif os.path.isdir(train_dir): model_path = os.path.join(train_dir, LAST_MODEL_FILE_FORMAT) else: raise Exception('path not exist') last_epoch = get_last_epoch(model_path) print("load from => {}".format(model_path)) custom_objects = get_custom_objects() custom_objects['custom_loss'] = custom_loss custom_objects['AdamWD'] = AdamWD model = keras.models.load_model(model_path, custom_objects=custom_objects) return model, last_epoch except Exception as e: print(str(e)) print("model file not found")
def load(self, model_dir): """ load the pre-trained model """ model_path = os.path.join(model_dir, 'bert.h5') try: graph = tf.Graph() with graph.as_default(): session = tf.Session() with session.as_default(): self.reply = load_model( str(model_path), custom_objects=get_custom_objects(), compile=False) with open(os.path.join(model_dir, 'label_map_bert.txt'), 'r') as f: self.label_map = eval(f.read()) self.graph = graph self.session = session except Exception as ex: print('load error') return self
import re from multiClsModelTrain import token_dict, OurTokenizer from keras.models import load_model from keras_bert import get_custom_objects maxlen = 300 # 加载训练好的模型 ifPool = 1 # 控制加载模型 1 - mean max pool; 0 - CLS syn_or_ant = 0 # 控制加载并列关系还是转折关系模型 0 - 并列关系; 0 - 转折关系 model_type = 0 # 控制加载模型为 0 - 多分类 1 - 二分类 if model_type == 0: if ifPool == 0: model = load_model("bert_model/multi_cls_bert.h5", custom_objects=get_custom_objects()) print('加载模型:multi_cls_bert.h5') else: model = load_model("bert_model/multi_mmp_bert.h5", custom_objects=get_custom_objects()) print('加载模型:multi_mmp_bert.h5') else: if syn_or_ant == 0: if ifPool == 0: model = load_model("bert_model/bi_syn_cls_bert.h5", custom_objects=get_custom_objects()) print('加载模型:bi_syn_cls_bert.h5') else: model = load_model("bert_model/bi_syn_mmp_bert.h5", custom_objects=get_custom_objects()) print('加载模型:bi_syn_mmp_bert.h5')
print("-" * 80) _t0 = time() print(f) if "albert" in f: word_index = get_vocab(base_dir="./", albert=True) elif "pair" in f or "clue" in f: word_index = get_vocab(base_dir="./", clue=True) else: word_index = get_vocab(base_dir="./") cfg["x_pad"] = word_index["[PAD]"] K.clear_session() print("[!] x_pad = {}".format(cfg["x_pad"])) if "albert" in f.lower() or "nezha" in f.lower(): model = load_model(f) else: model = load_model(f, custom_objects=get_custom_objects()) sub_model = get_model(model) pred = test(sub_model, test_data, x_dict=word_index) # auc = roc_auc_score(O1, pred) # acc = accuracy_score(O1, np.array(pred > 0.5, "int32")) # print("[{}]".format(time() - t0), auc, acc) print("[{}] f = `{}`, finish".format(time() - _t0, f)) print(pred.shape) preds.append(pred) del model, word_index, pred gc.collect() print("[{}]".format(time() - t0)) print(len_1, len_3) pred1 = ensemble_predictions(preds[0:len_1]) pred3 = ensemble_predictions(preds[len_1:len_1 + len_3])