Example #1
0
 def __init__(self, ):
     #分词词典加载
     with cs.open('../data/segment_dic.txt', 'r', 'utf-8') as fp:
         segment_dic = {}
         for line in fp:
             if line.strip():
                 segment_dic[line.strip()] = 0
     self.segment_dic = segment_dic
     self.max_seq_len = 20
     begin = time.time()
     jieba.load_userdict('../data/segment_dic.txt')
     print('加载用户分词词典时间为:%.2f' % (time.time() - begin))
     #加载训练好的实体识别模型
     custom_objects = get_custom_objects()
     self.ner_model = load_model('../data/model/ner_model.h5',
                                 custom_objects=custom_objects)
     #加载bert tokenlizer
     dict_path = '../../news_classifer_task/wwm/vocab.txt'
     token_dict = {}
     with cs.open(dict_path, 'r', 'utf8') as reader:
         for line in reader:
             token = line.strip()
             token_dict[token] = len(token_dict)
     self.tokenizer = Tokenizer(token_dict)
     print('mention extractor loaded')
Example #2
0
 def __init__(self, model_dir, batch=128):
     model_path = model_dir.format(r"best_model.hdf5")
     self.processer: BertPreProcess = dill.load(
         open(model_dir.format(r"process.dill"), "rb"))
     self.model = load_model(model_path,
                             custom_objects=get_custom_objects())
     self.batch = batch
Example #3
0
def load_model_encoder_details(model_path, encoder_path, details_path):
    custom_objects = get_custom_objects()
    my_objects = {'acc_top2': acc_top2}
    custom_objects.update(my_objects)
    model = load_model(model_path, custom_objects=custom_objects)
    encoder = joblib.load(encoder_path)
    nclass_dict = joblib.load(details_path)
    return model, encoder, nclass_dict['nclass']
Example #4
0
 def __init__(self, model_path):
     self.config = get_config_from_json('.//config.json')
     self.load_path = model_path
     self.val_data_dir = self.config.paths.val_data_dir
     self.test_data_dir = self.config.paths.test_data_dir
     self.model = tf.keras.models.load_model(
         self.load_path, custom_objects=get_custom_objects())
     print("Model loaded succesfully from " + self.load_path)
Example #5
0
 def test_sample(self):
     model = get_model(
         token_num=200,
         head_num=3,
         transformer_num=2,
     )
     model_path = os.path.join(tempfile.gettempdir(),
                               'keras_bert_%f.h5' % np.random.random())
     model.save(model_path)
     from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
     with CustomObjectScope(get_custom_objects(
     )):  # Workaround for incorrect global variable used in keras
         model = keras.models.load_model(
             model_path,
             custom_objects=get_custom_objects(),
         )
     model.summary(line_length=200)
Example #6
0
 def load(self, checkpoint_path):
     """
     loads an H5 file
     :param checkpoint_path:file path
     :return:
     """
     self.model = keras.models.load_model(
         checkpoint_path, custom_objects=get_custom_objects())
Example #7
0
class SentimentConfig(AppConfig):
    name = 'sentiment'
    json_file = open("sentiment/model/model.json",'r')
    features = pickle.load(open('sentiment/model/tf_model.preproc', 'rb'))
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = model_from_json(loaded_model_json,custom_objects=get_custom_objects())
    loaded_model.load_weights("sentiment/model/model.h5")
    
def build_model(lr: float, lr_d: float, process: BertPreProcess):
    """data"""

    train_gener = DataGener("train.json.crf.m30.CRFDropModel.expand.pre.json",
                            processer=process,
                            batch_size=8,
                            max_len=-1)
    val_gener = DataGener("validate.json.crf.m30.CRFDropModel.expand.pre.json",
                          processer=process,
                          batch_size=16,
                          max_len=-1)
    """layers"""
    inp_a = Input(shape=(None, ))
    inp_b = Input(shape=(None, ))
    out = get_layer(inp_a, inp_b)
    """call back"""
    check_point = ModelCheckpoint(model_path,
                                  monitor="val_loss",
                                  verbose=1,
                                  save_best_only=True,
                                  mode="min")
    early_stop = EarlyStopping(monitor="val_loss", mode="min", patience=2)
    tb_cb = TensorBoard(log_dir=log_filepath)
    metrics = Metrics()
    """fine-tune"""
    model = Model(inputs=[inp_a, inp_b], outputs=out)
    model.trainable = True
    # for layer in model.layers[:1]:
    #     layer.trainable = False
    model.summary()
    """train"""
    # vald = val_gener.get_bert_pair_text_all()
    # trnd = train_gener.get_bert_pair_text_all()
    # model.compile(loss="binary_crossentropy", optimizer=Adam(lr=lr, decay=lr_d), metrics=["accuracy", f1])
    model.compile(loss="binary_crossentropy",
                  optimizer=Adam(lr=lr, decay=lr_d),
                  metrics=["accuracy"])
    # model.fit(x=trnd[0],
    #           y=trnd[1],
    #           validation_data=vald,
    #           epochs=3,
    #           class_weight="auto",
    #           callbacks=[check_point, early_stop, tb_cb])
    model.fit_generator(train_gener.__iter__(),
                        steps_per_epoch=train_gener.__len__(),
                        epochs=5,
                        validation_data=val_gener.__iter__(),
                        validation_steps=val_gener.__len__(),
                        class_weight="auto",
                        callbacks=[check_point, early_stop, tb_cb])

    model.save(model_path)
    K.clear_session()
    tf.reset_default_graph()

    model = load_model(model_path, custom_objects=get_custom_objects())
    return model
Example #9
0
 def test_save_load_json(self):
     model = get_model(
         token_num=200,
         head_num=3,
         transformer_num=2,
     )
     data = model.to_json()
     model = keras.models.model_from_json(data, custom_objects=get_custom_objects())
     model.summary()
Example #10
0
def build_model(lr: float, lr_d: float, process: BertNerProcess):
    """data"""
    # validate train
    train_gener = DataGener("t.json", processer=process, batch_size=32)
    val_gener = DataGener("t.json", processer=process, batch_size=64)
    """layers"""
    x1_in = Input(shape=(None, ))  # 待识别句子输入
    x2_in = Input(shape=(None, ))  # 待识别句子输入
    s1_in = Input(shape=(None, ))  # 实体左边界(标签)
    s2_in = Input(shape=(None, ))  # 实体右边界(标签)
    p1, p2 = get_layer(x1_in, x2_in)
    """call back"""
    check_point = ModelCheckpoint(model_path,
                                  monitor="val_loss",
                                  verbose=1,
                                  save_best_only=True,
                                  mode="min")
    early_stop = EarlyStopping(monitor="val_loss", mode="min", patience=2)
    tb_cb = TensorBoard(log_dir=log_filepath)
    """fine-tune"""
    model = Model(inputs=[x1_in, x2_in], outputs=[p1, p2])
    # model.trainable = True
    # for layer in model.layers[:1]:
    #     layer.trainable = False
    model.summary()
    """train"""
    # vald = val_gener.get_bert_pair_text_all()
    # trnd = train_gener.get_bert_pair_text_all()
    # model.compile(loss="binary_crossentropy", optimizer=Adam(lr=lr, decay=lr_d), metrics=["accuracy", f1])
    loss1 = K.mean(K.categorical_crossentropy(s1_in, p1, from_logits=True))
    p2 -= (1 - K.cumsum(s1_in, 1)) * 1e10
    loss2 = K.mean(K.categorical_crossentropy(s2_in, p2, from_logits=True))
    loss = loss1 + loss2

    model.add_loss(loss)
    model.compile(optimizer=Adam(lr=lr, decay=lr_d))
    # model.fit(x=trnd[0],
    #           y=trnd[1],
    #           validation_data=vald,
    #           epochs=3,
    #           class_weight="auto",
    #           callbacks=[check_point, early_stop, tb_cb])
    model.fit_generator(train_gener.__iter__(),
                        steps_per_epoch=train_gener.__len__(),
                        epochs=5,
                        validation_data=val_gener.__iter__(),
                        validation_steps=val_gener.__len__(),
                        class_weight="auto",
                        callbacks=[check_point, early_stop, tb_cb])

    model.save(model_path)
    K.clear_session()
    tf.reset_default_graph()

    model = load_model(model_path, custom_objects=get_custom_objects())
    return model
Example #11
0
 def __init__(self):
     self.dp = DataProcess()
     self.abs_path = os.path.join(DATA_DIR, "bert_ner.h5")
     c = get_custom_objects()
     c.update({
         "CRF": CRF,
         'crf_loss': crf_loss,
         'crf_viterbi_accuracy': crf_accuracy
     })
     self.model = load_model(self.abs_path, custom_objects=c)
Example #12
0
def load_ner_model(ner_model_dir):
    with open(_ner_config_path(ner_model_dir)) as f:
        config = json.load(f)
    model = keras.models.load_model(_ner_model_path(ner_model_dir),
                                    custom_objects=get_custom_objects())
    tokenizer = tokenization.FullTokenizer(
        vocab_file=_ner_vocab_path(ner_model_dir),
        do_lower_case=config['do_lower_case'])
    labels = read_labels(_ner_labels_path(ner_model_dir))
    return model, tokenizer, labels, config
 def on_epoch_begin(self, epoch, logs=None):
     if epoch == 0:
         print("[!] test load&save model")
         f = self.filename + ".h5"
         custom_objects = get_custom_objects()
         self.model.save(f, include_optimizer=False, overwrite=True)
         if "bert" in cfg["verbose"]:
             model_ = load_model(f, custom_objects=custom_objects)
         else:
             model_ = load_model(f)
Example #14
0
 def load(self, model_path):
     """
     load the pre-trained model
     """
     try:
         self.albert_model = load_model(str(model_path),
                                        custom_objects=get_custom_objects(),
                                        compile=False)
     except Exception as ex:
         print('load error')
     return self
Example #15
0
 def on_epoch_begin(self, epoch, logs=None):
     if epoch ==  0:
         print("[!] test load&save model")
         f = self.filename + ".h5"
         f = os.path.join(SAVE_DIR, f)
         self.model.save(f, include_optimizer=False, overwrite=False)
         if "albert" in cfg["verbose"]:
             model_ = load_model(f) 
         elif "nezha" in cfg["verbose"]:
             model_ = load_model(f) 
         else:
             model_ = load_model(f, custom_objects=get_custom_objects()) 
Example #16
0
 def test_save_load_json(self):
     model = get_model(
         token_num=200,
         head_num=3,
         transformer_num=2,
         attention_activation='gelu',
     )
     compile_model(model)
     data = model.to_json()
     model = keras.models.model_from_json(
         data, custom_objects=get_custom_objects())
     model.summary()
Example #17
0
    def __init__(self):
        self.maxlen = 512

        self.sp_path = staticfiles_storage.path(
            'entrysheet/bert/wiki-ja.model')
        self.sp = spm.SentencePieceProcessor()
        self.sp.Load(self.sp_path)

        self.model_path = staticfiles_storage.path(
            'entrysheet/bert/bert_check_point.model')
        self.model = load_model(self.model_path,
                                custom_objects=get_custom_objects())
Example #18
0
    def _get_embed_by_bert(X):
        with timed_bolck(f'Prepare train model'):

            from keras_bert import load_trained_model_from_checkpoint

            model = load_trained_model_from_checkpoint(
                config_path,
                checkpoint_path,
                training=True,
                seq_len=SEQ_LEN,
            )
            #model.summary(line_length=120)

            from tensorflow.python import keras
            from keras_bert import AdamWarmup, calc_train_steps
            inputs = model.inputs[:2]
            dense = model.get_layer('NSP-Dense').output
            model = keras.models.Model(inputs, dense)  #.summary()

        with timed_bolck(f'try to gen embed DF{len(X)}'):
            input1_col = [
                col for col in X.columns if str(col).startswith('bert_')
            ]
            # train_x, train_y = filter_short_desc(train_x, train_y)

            input1 = X.loc[:, input1_col]  # .astype(np.float32)
            input2 = np.zeros_like(input1)  # .astype(np.int8)

            logger.info(f'NN Input1:{input1.shape}, Input2:{input2.shape}')

            label2id, id2label = get_label_id()
            from keras_bert import get_custom_objects
            import tensorflow as tf
            with tf.keras.utils.custom_object_scope(get_custom_objects()):
                res_list = []
                partition_len = 5000
                for sn in tqdm(range(1 + len(X) // partition_len),
                               'gen embeding'):
                    tmp = X.iloc[sn * partition_len:(sn + 1) * partition_len]
                    # print('\nbegin tmp\n', tmp.iloc[:3,:3].head())
                    res = model.predict([
                        tmp.loc[:, input1_col],
                        np.zeros_like(tmp.loc[:, input1_col])
                    ])
                    res = pd.DataFrame(res,
                                       index=tmp.index).add_prefix('embd_bert')
                    # print('\nend tmp\n', res.iloc[:3, :3].head())
                    res_list.append(res)

                res = pd.concat(res_list)

        return res
Example #19
0
 def test_sample(self):
     model = get_model(
         token_num=200,
         head_num=3,
         transformer_num=2,
     )
     model_path = os.path.join(tempfile.gettempdir(), 'keras_bert_%f.h5' % np.random.random())
     model.save(model_path)
     model = keras.models.load_model(
         model_path,
         custom_objects=get_custom_objects(),
     )
     model.summary(line_length=200)
Example #20
0
def load_model(input_model_path, input_json_path=None, input_yaml_path=None):
    if not Path(input_model_path).exists():
        raise FileNotFoundError(
            'Model file `{}` does not exist.'.format(input_model_path))
    try:
        model = load_keras_model(input_model_path,
                                 custom_objects=get_custom_objects())
        return model
    except FileNotFoundError as err:
        logging.error('Input mode file (%s) does not exist.',
                      FLAGS.input_model)
        raise err
    except ValueError as wrong_file_err:
        if input_json_path:
            if not Path(input_json_path).exists():
                raise FileNotFoundError(
                    'Model description json file `{}` does not exist.'.format(
                        input_json_path))
            try:
                model = model_from_json(open(str(input_json_path)).read())
                model.load_weights(input_model_path)
                return model
            except Exception as err:
                logging.error("Couldn't load model from json.")
                raise err
        elif input_yaml_path:
            if not Path(input_yaml_path).exists():
                raise FileNotFoundError(
                    'Model description yaml file `{}` does not exist.'.format(
                        input_yaml_path))
            try:
                model = model_from_yaml(open(str(input_yaml_path)).read())
                model.load_weights(input_model_path)
                return model
            except Exception as err:
                logging.error("Couldn't load model from yaml.")
                raise err
        else:
            logging.error(
                'Input file specified only holds the weights, and not '
                'the model definition. Save the model using '
                'model.save(filename.h5) which will contain the network '
                'architecture as well as its weights. '
                'If the model is saved using the '
                'model.save_weights(filename) function, either '
                'input_model_json or input_model_yaml flags should be set to '
                'to import the network architecture prior to loading the '
                'weights. \n'
                'Check the keras documentation for more details '
                '(https://keras.io/getting-started/faq/)')
            raise wrong_file_err
Example #21
0
 def test_task_embed(self):
     inputs, outputs = get_model(
         token_num=20,
         embed_dim=12,
         head_num=3,
         transformer_num=2,
         use_task_embed=True,
         task_num=10,
         training=False,
         dropout_rate=0.0,
     )
     model = keras.models.Model(inputs, outputs)
     model_path = os.path.join(tempfile.gettempdir(),
                               'keras_bert_%f.h5' % np.random.random())
     model.save(model_path)
     from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
     with CustomObjectScope(get_custom_objects(
     )):  # Workaround for incorrect global variable used in keras
         model = keras.models.load_model(
             model_path,
             custom_objects=get_custom_objects(),
         )
     model.summary(line_length=200)
Example #22
0
def load_model(
    model_path: str,
    load_weights: bool = True
) -> Union[BaseClassificationModel, BaseLabelingModel]:
    """
    Load saved model from saved model from `model.save` function
    Args:
        model_path: model folder path
        load_weights: only load model structure and vocabulary when set to False, default True.

    Returns:

    """
    import keras_bert
    with open(os.path.join(model_path, 'model_info.json'), 'r') as f:
        model_info = json.load(f)

    model_class = pydoc.locate(
        f"{model_info['module']}.{model_info['class_name']}")
    model_json_str = json.dumps(model_info['tf_model'])

    model = model_class()

    # Fix loading bug caused by custom objects naming duplication in keras_bert and bert4keras
    custom_obj_1 = kashgari.custom_objects
    custom_obj_2 = dict(custom_obj_1)
    custom_obj_2.update(keras_bert.get_custom_objects())

    model.tf_model = _custom_load_keras_model_from_json(model_json_str)

    if load_weights:
        model.tf_model.load_weights(
            os.path.join(model_path, 'model_weights.h5'))

    embed_info = model_info['embedding']
    embed_class = pydoc.locate(
        f"{embed_info['module']}.{embed_info['class_name']}")
    embedding: Embedding = embed_class._load_saved_instance(
        embed_info, model_path, model.tf_model)

    model.embedding = embedding

    if type(model.tf_model.layers[-1]) == CRF:
        model.layer_crf = model.tf_model.layers[-1]

    return model
Example #23
0
def _custom_load_keras_model_from_json(json_str):
    # Fix loading bug caused by custom objects naming duplication in keras_bert and bert4keras
    import keras_bert
    custom_obj_1 = kashgari.custom_objects
    custom_obj_2 = dict(custom_obj_1)
    custom_obj_2.update(keras_bert.get_custom_objects())

    model, exp = None, None
    for custom_obj in [custom_obj_1, custom_obj_2]:
        try:
            model = tf.keras.models.model_from_json(json_str, custom_obj)
            break
        except Exception as e:
            exp = e

    if model:
        return model
    else:
        raise exp
Example #24
0
 def __init__(self, gpu_name, gpu_num, seq_max_len, batch_size):
     print('--' * 10 + ' Load BERT model start ' + '--' * 10)
     gpu_option(gpu_name, gpu_num)
     self.seq_max_len = seq_max_len  # same to train
     self.batch_size = batch_size
     model_path = 'models/BERT/pretrained_model/uncased_L-24_H-1024_A-16'
     vocab_path = os.path.join(model_path, 'vocab.txt')
     # load Tokenizer
     token_dict = load_vocabulary(vocab_path)
     self.tokenizer = Tokenizer(token_dict)
     MODEL_SAVE_PATH = 'models/BERT/fine_tune_model/bert_fine_tune.hdf5'
     model = load_model(MODEL_SAVE_PATH,
                        custom_objects=get_custom_objects(),
                        compile=False)
     if gpu_num >= 2:
         self.par_model = multi_gpu_model(model, gpus=gpu_num)
     else:
         self.par_model = model
     print('--' * 10 + ' Load BERT model end ' + '--' * 10)
Example #25
0
 def test_task_embed(self):
     inputs, outputs = get_model(
         token_num=20,
         embed_dim=12,
         head_num=3,
         transformer_num=2,
         use_task_embed=True,
         task_num=10,
         training=False,
         dropout_rate=0.0,
     )
     model = keras.models.Model(inputs, outputs)
     model_path = os.path.join(tempfile.gettempdir(), 'keras_bert_%f.h5' % np.random.random())
     model.save(model_path)
     model = keras.models.load_model(
         model_path,
         custom_objects=get_custom_objects(),
     )
     model.summary(line_length=200)
Example #26
0
    def Init():

        # GPU
        if Config.GPUEnable == False:
            os.environ["CUDA_VISIBLE_DEVICES"] = "-1"

        # 获取新的tokenizer
        if SentimentClassification.tokenizer is None:
            SentimentClassification.tokenizer = OurTokenizer(
                SentimentClassification.get_token_dict())

        if SentimentClassification.model is None:
            # 模型加载
            custom_objects = get_custom_objects()
            my_objects = {'acc_top2': SentimentClassification.acc_top2}
            custom_objects.update(my_objects)

            app = Flask(__name__)
            model_path = os.path.join(app.static_folder, Config.model_path)
            SentimentClassification.model = load_model(
                model_path, custom_objects=custom_objects)
Example #27
0
def load_model(train_dir):
    try:
        if os.path.isfile(train_dir):
            model_path = train_dir
        elif os.path.isdir(train_dir):
            model_path = os.path.join(train_dir, LAST_MODEL_FILE_FORMAT)
        else:
            raise Exception('path not exist')

        last_epoch = get_last_epoch(model_path)
        print("load from => {}".format(model_path))
        custom_objects = get_custom_objects()
        custom_objects['custom_loss'] = custom_loss
        custom_objects['AdamWD'] = AdamWD
        model = keras.models.load_model(model_path,
                                        custom_objects=custom_objects)
        return model, last_epoch

    except Exception as e:
        print(str(e))
        print("model file not found")
Example #28
0
 def load(self, model_dir):
     """
     load the pre-trained model
     """
     model_path = os.path.join(model_dir, 'bert.h5')
     try:
         graph = tf.Graph()
         with graph.as_default():
             session = tf.Session()
             with session.as_default():
                 self.reply = load_model(
                     str(model_path),
                     custom_objects=get_custom_objects(),
                     compile=False)
                 with open(os.path.join(model_dir, 'label_map_bert.txt'),
                           'r') as f:
                     self.label_map = eval(f.read())
                 self.graph = graph
                 self.session = session
     except Exception as ex:
         print('load error')
     return self
Example #29
0
import re
from multiClsModelTrain import token_dict, OurTokenizer
from keras.models import load_model
from keras_bert import get_custom_objects

maxlen = 300

# 加载训练好的模型
ifPool = 1  # 控制加载模型 1 - mean max pool; 0 - CLS
syn_or_ant = 0  # 控制加载并列关系还是转折关系模型 0 - 并列关系; 0 - 转折关系
model_type = 0  # 控制加载模型为 0 - 多分类 1 - 二分类

if model_type == 0:
    if ifPool == 0:
        model = load_model("bert_model/multi_cls_bert.h5",
                           custom_objects=get_custom_objects())
        print('加载模型:multi_cls_bert.h5')
    else:
        model = load_model("bert_model/multi_mmp_bert.h5",
                           custom_objects=get_custom_objects())
        print('加载模型:multi_mmp_bert.h5')
else:
    if syn_or_ant == 0:
        if ifPool == 0:
            model = load_model("bert_model/bi_syn_cls_bert.h5",
                               custom_objects=get_custom_objects())
            print('加载模型:bi_syn_cls_bert.h5')
        else:
            model = load_model("bert_model/bi_syn_mmp_bert.h5",
                               custom_objects=get_custom_objects())
            print('加载模型:bi_syn_mmp_bert.h5')
Example #30
0
    print("-" * 80)
    _t0 = time()
    print(f)
    if "albert" in f:
        word_index = get_vocab(base_dir="./", albert=True)
    elif "pair" in f or "clue" in f:
        word_index = get_vocab(base_dir="./", clue=True)
    else:
        word_index = get_vocab(base_dir="./")
    cfg["x_pad"] = word_index["[PAD]"]
    K.clear_session()
    print("[!] x_pad = {}".format(cfg["x_pad"]))
    if "albert" in f.lower() or "nezha" in f.lower():
        model = load_model(f)
    else:
        model = load_model(f, custom_objects=get_custom_objects())
    sub_model = get_model(model)
    pred = test(sub_model, test_data, x_dict=word_index)
    #     auc = roc_auc_score(O1, pred)
    #     acc = accuracy_score(O1, np.array(pred > 0.5, "int32"))
    #     print("[{}]".format(time() - t0), auc, acc)
    print("[{}] f = `{}`, finish".format(time() - _t0, f))
    print(pred.shape)
    preds.append(pred)
    del model, word_index, pred
    gc.collect()

print("[{}]".format(time() - t0))
print(len_1, len_3)
pred1 = ensemble_predictions(preds[0:len_1])
pred3 = ensemble_predictions(preds[len_1:len_1 + len_3])