def load_bert_mc(self): b_model = get_model(token_num=21128, ) ##21128是词典大小 compile_model(b_model) bert_model = Model( inputs=b_model.input[:2], outputs=b_model.get_layer('Encoder-12-FeedForward-Norm').output) x1_in = Input(shape=(None, )) # 问题和资料的拼接句子输入 x2_in = Input(shape=(None, )) # 问题和资料的拼接句子输入 s1_in = Input(shape=(None, )) #答案的左边界(标签) s2_in = Input(shape=(None, )) #答案的右边界(标签) x1, x2, s1, s2 = x1_in, x2_in, s1_in, s2_in x_mask = Lambda( lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(x1) x = bert_model([x1, x2]) ps1 = Dense(1, use_bias=False)(x) ###[[0.1],[0.2],[0.3]..] -> [0.1,0.2,0.3,...] ###[0.1,0.2,0.3,...] - [0,0,0,0,1,1,1,1]*1e10 ps1 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)( [ps1, x_mask]) # ps1 = Lambda(lambda x: x[0]*x[1])([ps1, x_mask]) ps2 = Dense(1, use_bias=False)(x) ps2 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)( [ps2, x_mask]) # ps2 = Lambda(lambda x:x[0]*x[1])([ps2, x_mask]) model = Model([x1_in, x2_in], [ps1, ps2]) model.load_weights(self.model_path) return model
def test_save_load_json(self): model = get_model( token_num=200, head_num=3, transformer_num=2, attention_activation='gelu', ) compile_model(model) data = model.to_json() set_custom_objects() model = keras.models.model_from_json(data) model.summary()
def pretrain_model(): df = pd.read_csv('../data/task2_trainset.csv', dtype=str) df_2 = pd.read_csv('../data/task2_public_testset.csv', dtype=str) abstract_1 = df.values[:, 2] abstract_2 = df_2.values[:, 2] token_dict = load_vocabulary(dict_path) token_list = list(token_dict.keys()) tokenizer = Tokenizer(token_dict) X_1 = collect_inputs(abstract_1, tokenizer) X_2 = collect_inputs(abstract_2, tokenizer) X = X_1 + X_2 print(len(X)) model = load_trained_model_from_checkpoint(config_path, checkpoint_path, training=True, trainable=True, seq_len=512) compile_model(model) def _generator(): while True: yield gen_batch_inputs(generate_input_by_batch(X), token_dict, token_list, seq_len=512, mask_rate=0.3) opt_filepath = sys.argv[1] checkpoint = ModelCheckpoint(opt_filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min', save_weights_only=True) reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1, mode='auto', min_delta=0.1, cooldown=10, min_lr=1e-10) es = EarlyStopping(monitor='val_loss', patience=50) callbacks_list = [checkpoint, es, reduce_lr] model.fit_generator(generator=_generator(), steps_per_epoch=500, epochs=5000, validation_data=_generator(), validation_steps=200, callbacks=callbacks_list)
def test_save_load_json(self): model = get_model( token_num=200, head_num=3, transformer_num=2, attention_activation='gelu', ) compile_model(model) data = model.to_json() # 保存模型到json文件 with open('./mybert_config.json', 'w') as file: file.write(data) set_custom_objects() # # 从json对象中加载模型 # model = keras.models.model_from_json(data) # 从json文件中加载模型 with open('./mybert_config.json', 'r') as file: model_json = file.read() model = keras.models.model_from_json(model_json) model.summary()
with strategy.scope(): model = get_model( token_num=200000, # head_num=25, # transformer_num=20, embed_dim=4 8 *2, feed_forward_dim=512, seq_len=100, pos_num=100, # 128 training=True, trainable=None, dropout_rate=0.1, ) compile_model(model) # model = load_trained_model_from_checkpoint( # config_path, # checkpoint_path, # training=True, # trainable=True, # seq_len=SEQ_LEN, # ) print(model.summary()) # @title Download IMDB Data import tensorflow as tf dataset = tf.keras.utils.get_file(
def test_fit(self): current_path = os.path.dirname(os.path.abspath(__file__)) model_path = os.path.join(current_path, 'test_bert_fit.h5') sentence_pairs = [ [['all', 'work', 'and', 'no', 'play'], ['makes', 'jack', 'a', 'dull', 'boy']], [['from', 'the', 'day', 'forth'], ['my', 'arm', 'changed']], [['and', 'a', 'voice', 'echoed'], ['power', 'give', 'me', 'more', 'power']], ] token_dict = get_base_dict() for pairs in sentence_pairs: for token in pairs[0] + pairs[1]: if token not in token_dict: token_dict[token] = len(token_dict) token_list = list(token_dict.keys()) if os.path.exists(model_path): steps_per_epoch = 10 model = keras.models.load_model( model_path, custom_objects=get_custom_objects(), ) else: steps_per_epoch = 1000 model = get_model( token_num=len(token_dict), head_num=5, transformer_num=12, embed_dim=25, feed_forward_dim=100, seq_len=20, pos_num=20, dropout_rate=0.05, attention_activation='gelu', ) compile_model( model, learning_rate=1e-3, decay_steps=30000, warmup_steps=10000, weight_decay=1e-3, ) model.summary() def _generator(): while True: yield gen_batch_inputs( sentence_pairs, token_dict, token_list, seq_len=20, mask_rate=0.3, swap_sentence_rate=1.0, ) model.fit_generator( generator=_generator(), steps_per_epoch=steps_per_epoch, epochs=1, validation_data=_generator(), validation_steps=steps_per_epoch // 10, ) # model.save(model_path) for inputs, outputs in _generator(): predicts = model.predict(inputs) outputs = list(map(lambda x: np.squeeze(x, axis=-1), outputs)) predicts = list(map(lambda x: np.argmax(x, axis=-1), predicts)) batch_size, seq_len = inputs[-1].shape for i in range(batch_size): match, total = 0, 0 for j in range(seq_len): if inputs[-1][i][j]: total += 1 if outputs[0][i][j] == predicts[0][i][j]: match += 1 self.assertGreater(match, total * 0.9) self.assertTrue(np.allclose(outputs[1], predicts[1])) break
def GetModel(ucfg): ''' ucfg: user's Config for the table output: nnname, BS, BPE ''' nnname = ucfg['nnname'] isconv = True if nnname == 'newmodel': import sys sys.path.append("..") from newmodel import tfmodel model, isconv = tfmodel() sys.path.remove("..") import tensorflow.keras.applications as nn if hasattr(nn, nnname): model = getattr(nn, nnname)(weights=None) # efficientnet: B0-B7 elif nnname[:-2] == 'EfficientNet': import tfmodels.efficientnet.tfkeras as nn model = getattr(nn, nnname)(weights=None) # TF2.x Models: elif nnname == 'ncf': import tfmodels.ncf as nn name = 'ncfmodel' model = getattr(nn, name)(istrain=False) isconv = False elif nnname == 'din': import tfmodels.din as nn name = 'din' _, model = getattr(nn, name)(item_count=63001, cate_count=801, hidden_units=128) isconv = False # bert from bert_keras elif nnname == 'bert': isconv = False from keras_bert import get_base_dict, get_model, compile_model # Build token dictionary token_dict = get_base_dict() training = True if training: # # bert base # embed_dim=768 # bert small # headnum=12 # layernum=12 # bert large embed_dim = 1024 # bert small headnum = 16 layernum = 24 ff_dim = embed_dim * 4 token_num = 30522 # number of words from paper model = get_model(token_num=token_num, pos_num=512, seq_len=512, embed_dim=embed_dim, transformer_num=layernum, head_num=headnum, feed_forward_dim=ff_dim, training=training) else: # Revise lib\site-packages\keras_bert\bert.py: line164 # "return inputs, transformed" -> "return inputs, transformed,model" _, _, model = get_model(token_num=len(token_dict), embed_dim=1024, head_num=16, training=training) compile_model(model) if nnname == 'mymodel': isconv = False ## ===== To add a customized model ==== # refer to: https://keras.io/guides/sequential_model/ from tensorflow.keras import layers # Define a customized model model = keras.Sequential() model.add(keras.Input(shape=(250, 250, 3))) # 250x250 RGB images model.add(layers.Conv2D(32, 5, strides=2, activation="relu")) model.add(layers.Conv2D(32, 3, activation="relu")) model.add(layers.MaxPooling2D(3)) model.add(layers.Conv2D(32, 3, activation="relu")) model.add(layers.Conv2D(32, 3, activation="relu")) model.add(layers.MaxPooling2D(3)) model.add(layers.Conv2D(32, 3, activation="relu")) model.add(layers.Conv2D(32, 3, activation="relu")) model.add(layers.MaxPooling2D(2)) # Now that we have 4x4 feature maps, time to apply global max pooling. model.add(layers.GlobalMaxPooling2D()) # Finally, we add a classification layer. model.add(layers.Dense(10)) ## ===== end of your codes ====== if True: g = keras.utils.model_to_dot(model, show_shapes=True) if nnname == 'newmodel': nnname = ucfg['model'] g.write_pdf(".//outputs//tf//" + nnname + '.pdf') return model, isconv
def pretrain_model(opt_filepath, data_dir, gpu_id): os.environ['CUDA_VISIBLE_DEVICES'] = gpu_id #gpus = tf.config.experimental.list_physical_devices('GPU') #tf.config.experimental.set_memory_growth(gpus[0], True) token_dict = load_vocabulary(dict_path) token_list = list(token_dict.keys()) #if not os.path.exists(os.path.join(data_dir, 'pretrain_X.npy')): df = pd.read_csv(os.path.join(data_dir, 'task2_trainset.csv'), dtype=str) df_2 = pd.read_csv(os.path.join(data_dir, 'task2_public_testset.csv'), dtype=str) abstract_1 = df.values[:, 2] abstract_2 = df_2.values[:, 2] tokenizer = Tokenizer(token_dict) X_1 = collect_inputs(abstract_1, tokenizer) X_2 = collect_inputs(abstract_2, tokenizer) X = np.array(X_1 + X_2) # np.save(os.path.join(data_dir, 'pretrain_X.npy'), X) #else: # X = np.load(os.path.join(data_dir, 'pretrain_X.npy')) print(X.shape) model = load_trained_model_from_checkpoint(config_path, checkpoint_path, training=True, trainable=get_layers_name( range(12, 25)), seq_len=512) compile_model(model) def _generator(batch_size=4): while True: idx = np.random.permutation(X.shape[0]) for i in range(0, idx.shape[0], batch_size): yield gen_batch_inputs(X[i:i + batch_size], token_dict, token_list, seq_len=512, mask_rate=0.3) checkpoint = ModelCheckpoint(opt_filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min', save_weights_only=True) trainable_layer = list(range(12 * 8, 19 * 8, 8)) batch_size = [3] * 3 + [3] * 3 for i, layer_i in enumerate(trainable_layer): for j, layer in enumerate(model.layers): if j >= layer_i: layer.trainable = True print(layer.name, layer.trainable) else: layer.trainable = False compile_model(model) if os.path.exists(opt_filepath): model.load_weights(opt_filepath) es = EarlyStopping(monitor='val_loss', patience=20) reduce_lr = ReduceLROnPlateau(factor=0.7, patience=4, verbose=1, min_lr=1e-6) callbacks_list = [checkpoint, es, reduce_lr] model.fit_generator(generator=_generator(batch_size[i]), steps_per_epoch=500, epochs=5000, validation_data=_generator(), validation_steps=200, callbacks=callbacks_list)