def test_extract_embeddings_variable_lengths(self): tokens = [ '[PAD]', '[UNK]', '[CLS]', '[SEP]', 'all', 'work', 'and', 'no', 'play', 'makes', 'jack', 'a', 'dull', 'boy', '~', ] token_dict = {token: i for i, token in enumerate(tokens)} inputs, outputs = get_model( token_num=len(tokens), pos_num=20, seq_len=None, embed_dim=13, transformer_num=1, feed_forward_dim=17, head_num=1, training=False, ) model = keras.models.Model(inputs, outputs) embeddings = extract_embeddings( model, [ ('all work and no play', 'makes jack'), ('a dull boy', 'all work and no play and no play'), ], vocabs=token_dict, batch_size=2, ) self.assertEqual(2, len(embeddings)) self.assertEqual((10, 13), embeddings[0].shape) self.assertEqual((14, 13), embeddings[1].shape)
def load_bert_mc(self): b_model = get_model(token_num=21128, ) ##21128是词典大小 compile_model(b_model) bert_model = Model( inputs=b_model.input[:2], outputs=b_model.get_layer('Encoder-12-FeedForward-Norm').output) x1_in = Input(shape=(None, )) # 问题和资料的拼接句子输入 x2_in = Input(shape=(None, )) # 问题和资料的拼接句子输入 s1_in = Input(shape=(None, )) #答案的左边界(标签) s2_in = Input(shape=(None, )) #答案的右边界(标签) x1, x2, s1, s2 = x1_in, x2_in, s1_in, s2_in x_mask = Lambda( lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(x1) x = bert_model([x1, x2]) ps1 = Dense(1, use_bias=False)(x) ###[[0.1],[0.2],[0.3]..] -> [0.1,0.2,0.3,...] ###[0.1,0.2,0.3,...] - [0,0,0,0,1,1,1,1]*1e10 ps1 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)( [ps1, x_mask]) # ps1 = Lambda(lambda x: x[0]*x[1])([ps1, x_mask]) ps2 = Dense(1, use_bias=False)(x) ps2 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)( [ps2, x_mask]) # ps2 = Lambda(lambda x:x[0]*x[1])([ps2, x_mask]) model = Model([x1_in, x2_in], [ps1, ps2]) model.load_weights(self.model_path) return model
def test_save_load_json(self): model = get_model( token_num=200, head_num=3, transformer_num=2, ) data = model.to_json() model = keras.models.model_from_json(data, custom_objects=get_custom_objects()) model.summary()
def test_get_token_embedding(self): model = get_model( token_num=200, head_num=3, transformer_num=2, attention_activation='gelu', ) embed = get_token_embedding(model) self.assertEqual((200, 768), K.int_shape(embed))
def test_save_load_json(self): model = get_model( token_num=200, head_num=3, transformer_num=2, attention_activation='gelu', ) data = model.to_json() set_custom_objects() model = keras.models.model_from_json(data) model.summary()
def test_sample(self): model = get_model( token_num=200, head_num=3, transformer_num=2, ) model_path = os.path.join(tempfile.gettempdir(), 'keras_bert_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model( model_path, custom_objects=get_custom_objects(), ) model.summary(line_length=200)
def test_sample(self): model = get_model( token_num=200, head_num=3, transformer_num=2, ) model_path = os.path.join(tempfile.gettempdir(), 'keras_bert_%f.h5' % np.random.random()) model.save(model_path) from tensorflow.python.keras.utils.generic_utils import CustomObjectScope with CustomObjectScope(get_custom_objects( )): # Workaround for incorrect global variable used in keras model = keras.models.load_model( model_path, custom_objects=get_custom_objects(), ) model.summary(line_length=200)
def test_task_embed(self): inputs, outputs = get_model( token_num=20, embed_dim=12, head_num=3, transformer_num=2, use_task_embed=True, task_num=10, training=False, dropout_rate=0.0, ) model = keras.models.Model(inputs, outputs) model_path = os.path.join(tempfile.gettempdir(), 'keras_bert_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model( model_path, custom_objects=get_custom_objects(), ) model.summary(line_length=200)
def creat_model(self): model = keras_bert.get_model(token_num=self.vocab_size, seq_len = max_len, dropout_rate = drop_rate, ) inputs = model.inputs embedding = model.get_layer('Encoder-12-FeedForward-Norm').output print("Inputs shape:"+str((np.array(inputs)).shape)) print(embedding.shape) x = Bidirectional(LSTM(units=self.rnn_units, return_sequences=True))(embedding) x = Dropout(self.drop_rate)(x) x = Dense(self.n_class)(x) self.crf = CRF(self.n_class, sparse_target=False) x = self.crf(x) self.model = Model(inputs=inputs, outputs=x) self.model.summary() self.compile() return self.model
def test_get_layers(self): def _custom_layers(x, trainable=True): return keras.layers.LSTM( units=768, trainable=trainable, return_sequences=True, name='LSTM', )(x) inputs, output_layer = get_model( token_num=200, embed_dim=768, custom_layers=_custom_layers, training=False, ) model = keras.models.Model(inputs=inputs, outputs=output_layer) model.compile(optimizer='adam', loss='mse') model.summary() self.assertTrue(model is not None)
def test_save_load_json(self): model = get_model( token_num=200, head_num=3, transformer_num=2, attention_activation='gelu', ) compile_model(model) data = model.to_json() # 保存模型到json文件 with open('./mybert_config.json', 'w') as file: file.write(data) set_custom_objects() # # 从json对象中加载模型 # model = keras.models.model_from_json(data) # 从json文件中加载模型 with open('./mybert_config.json', 'r') as file: model_json = file.read() model = keras.models.model_from_json(model_json) model.summary()
def test_task_embed(self): inputs, outputs = get_model( token_num=20, embed_dim=12, head_num=3, transformer_num=2, use_task_embed=True, task_num=10, training=False, dropout_rate=0.0, ) model = keras.models.Model(inputs, outputs) model_path = os.path.join(tempfile.gettempdir(), 'keras_bert_%f.h5' % np.random.random()) model.save(model_path) from tensorflow.python.keras.utils.generic_utils import CustomObjectScope with CustomObjectScope(get_custom_objects( )): # Workaround for incorrect global variable used in keras model = keras.models.load_model( model_path, custom_objects=get_custom_objects(), ) model.summary(line_length=200)
['makes', 'jack', 'a', 'dull', 'boy']], [['from', 'the', 'day', 'forth'], ['my', 'arm', 'changed']], [['and', 'a', 'voice', 'echoed'], ['power', 'give', 'me', 'more', 'power']], ] # sentence_pairs = [i for i in stp()] token_dict = get_base_dict() for pairs in sentence_pairs: for token in pairs[0] + pairs[1]: if token not in token_dict: token_dict[token] = len(token_dict) token_list = list(token_dict.keys()) model = get_model(token_num=len(token_dict), head_num=5, transformer_num=12, embed_dim=25, feed_forward_dim=100, seq_len=20, pos_num=20, dropout_rate=0.1) model.summary() def _generator(): while True: yield gen_batch_inputs( sentence_pairs, token_dict, token_list, seq_len=20, mask_rate=0.3, swap_sentence_rate=1.0,
def build_model_from_config( config_file, checkpoint_file, training=False, trainable=False, seq_len=None, ): """Build the model from config file. :param config_file: The path to the JSON configuration file. :param training: If training, the whole model will be returned. :param trainable: Whether the model is trainable. :param seq_len: If it is not None and it is shorter than the value in the config file, the weights in position embeddings will be sliced to fit the new length. :return: model and config """ with open(config_file, 'r') as reader: config = json.loads(reader.read()) if seq_len is not None: config['max_position_embeddings'] = min( seq_len, config['max_position_embeddings']) if trainable is None: trainable = training model = get_model( token_num=config['vocab_size'], pos_num=config['max_position_embeddings'], seq_len=config['max_position_embeddings'], embed_dim=config['hidden_size'], transformer_num=config['num_hidden_layers'], head_num=config['num_attention_heads'], feed_forward_dim=config['intermediate_size'], training=False, trainable=True, ) inputs, outputs = model bio_label = Input(shape=(maxlen, )) event = Input(shape=(1, )) mask = Lambda( lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))( inputs[0]) event_embedding = Embedding(len(event2id), hidden_size, mask_zero=True)(event) outputs = Dropout(0.15)(outputs) attention = TimeDistributed(Dense(1, activation='tanh'))(outputs) attention = MaskFlatten()(attention) attention = Activation('softmax')(attention) attention = MaskRepeatVector(config['hidden_size'])(attention) attention = MaskPermute([2, 1])(attention) sent_representation = multiply([outputs, attention]) attention = Lambda(lambda xin: K.sum(xin, axis=1))(sent_representation) t_dim = K.int_shape(outputs)[-1] bert_attention = Lambda(seq_and_vec, output_shape=(None, t_dim * 2))([outputs, attention]) cnn1 = MaskedConv1D(filters=hidden_size, kernel_size=3, activation='relu', padding='same')(bert_attention) event_bc = Lambda(lambda input: input[0] * 0 + input[1])( [cnn1, event_embedding]) con_cnn_event = Concatenate(axis=-1)([cnn1, event_bc]) dens1 = Dense(hidden_size, activation='relu', use_bias=True)(con_cnn_event) #BIOE bio_pred = Dense(4, activation='softmax')(dens1) entity_model = keras.models.Model([inputs[0], inputs[1], event], [bio_pred]) # 预测subject的模型 train_model = keras.models.Model([inputs[0], inputs[1], bio_label, event], [bio_pred]) loss = K.sparse_categorical_crossentropy(bio_label, bio_pred) loss = K.sum(loss * mask[:, :, 0]) / K.sum(mask) train_model.add_loss(loss) train_model.summary() train_model.compile(optimizer=keras.optimizers.Adam(lr=3e-5), ) load_model_weights_from_checkpoint(train_model, config, checkpoint_file, training) return train_model, entity_model
model_path = "./{}/".format(BASE_MODEL_DIR) seq_len = 128 with open(model_path + "bert_config.json", 'r') as reader: config = json.loads(reader.read()) if seq_len is not None: config['max_position_embeddings'] = seq_len = min( seq_len, config['max_position_embeddings']) bert = get_model( token_num=config['vocab_size'], pos_num=config['max_position_embeddings'], seq_len=seq_len, embed_dim=config['hidden_size'], transformer_num=config['num_hidden_layers'], head_num=config['num_attention_heads'], feed_forward_dim=config['intermediate_size'], feed_forward_activation=config['hidden_act'], training=None, trainable=True, output_layer_num=1, ) inputs, outputs = bert print(type(bert), type(outputs)) load_model_weights_from_checkpoint(outputs, config, model_path + "bert_model.ckpt") x1 = Input(shape=(None, )) x2 = Input(shape=(None, )) bert_out = outputs.output([x1, x2])
training=True, trainable=True, seq_len=seq_len) Input_layer = model.inputs[:2] x = model.layers[-9].output x = BatchNormalization()(x) x = Lambda(lambda model: model[:, 0])(x) x = BatchNormalization()(x) x = Dropout(0.2)(x) Output_layer = Dense(3, activation='sigmoid')(x) model = Model(Input_layer, Output_layer) model.load_weights(sys.argv[1]) return model model = get_model() Y_pred = model.predict([X, seg], verbose=1) ''' print(Y_pred_by_cate) Y_pred_by_cate = np.load('./Y_test_pred_by_category.npy') Y_pred = (3*Y_pred + Y_pred_by_cate) / 4 ''' Y_pred = (Y_pred > 0.5) other_pred = np.sum(Y_pred, axis=1) < 0.9 Y = np.hstack((Y_pred, other_pred.reshape(-1, 1))).astype('int') opt_path = sys.argv[2] f = open(opt_path, 'w') wt_str = 'order_id,THEORETICAL,ENGINEERING,EMPIRICAL,OTHERS\n' f.write(wt_str) for i in range(Y_pred.shape[0]):
# @title Load Basic Model import codecs from keras_bert import load_trained_model_from_checkpoint, get_model, compile_model token_dict = {} with codecs.open(vocab_path, 'r', 'utf8') as reader: for line in reader: token = line.strip() token_dict[token] = len(token_dict) with strategy.scope(): model = get_model( token_num=200000, # head_num=25, # transformer_num=20, embed_dim=4 8 *2, feed_forward_dim=512, seq_len=100, pos_num=100, # 128 training=True, trainable=None, dropout_rate=0.1, ) compile_model(model) # model = load_trained_model_from_checkpoint( # config_path, # checkpoint_path, # training=True,
def GetModel(ucfg): ''' ucfg: user's Config for the table output: nnname, BS, BPE ''' nnname = ucfg['nnname'] isconv = True if nnname == 'newmodel': import sys sys.path.append("..") from newmodel import tfmodel model, isconv = tfmodel() sys.path.remove("..") import tensorflow.keras.applications as nn if hasattr(nn, nnname): model = getattr(nn, nnname)(weights=None) # efficientnet: B0-B7 elif nnname[:-2] == 'EfficientNet': import tfmodels.efficientnet.tfkeras as nn model = getattr(nn, nnname)(weights=None) # TF2.x Models: elif nnname == 'ncf': import tfmodels.ncf as nn name = 'ncfmodel' model = getattr(nn, name)(istrain=False) isconv = False elif nnname == 'din': import tfmodels.din as nn name = 'din' _, model = getattr(nn, name)(item_count=63001, cate_count=801, hidden_units=128) isconv = False # bert from bert_keras elif nnname == 'bert': isconv = False from keras_bert import get_base_dict, get_model, compile_model # Build token dictionary token_dict = get_base_dict() training = True if training: # # bert base # embed_dim=768 # bert small # headnum=12 # layernum=12 # bert large embed_dim = 1024 # bert small headnum = 16 layernum = 24 ff_dim = embed_dim * 4 token_num = 30522 # number of words from paper model = get_model(token_num=token_num, pos_num=512, seq_len=512, embed_dim=embed_dim, transformer_num=layernum, head_num=headnum, feed_forward_dim=ff_dim, training=training) else: # Revise lib\site-packages\keras_bert\bert.py: line164 # "return inputs, transformed" -> "return inputs, transformed,model" _, _, model = get_model(token_num=len(token_dict), embed_dim=1024, head_num=16, training=training) compile_model(model) if nnname == 'mymodel': isconv = False ## ===== To add a customized model ==== # refer to: https://keras.io/guides/sequential_model/ from tensorflow.keras import layers # Define a customized model model = keras.Sequential() model.add(keras.Input(shape=(250, 250, 3))) # 250x250 RGB images model.add(layers.Conv2D(32, 5, strides=2, activation="relu")) model.add(layers.Conv2D(32, 3, activation="relu")) model.add(layers.MaxPooling2D(3)) model.add(layers.Conv2D(32, 3, activation="relu")) model.add(layers.Conv2D(32, 3, activation="relu")) model.add(layers.MaxPooling2D(3)) model.add(layers.Conv2D(32, 3, activation="relu")) model.add(layers.Conv2D(32, 3, activation="relu")) model.add(layers.MaxPooling2D(2)) # Now that we have 4x4 feature maps, time to apply global max pooling. model.add(layers.GlobalMaxPooling2D()) # Finally, we add a classification layer. model.add(layers.Dense(10)) ## ===== end of your codes ====== if True: g = keras.utils.model_to_dot(model, show_shapes=True) if nnname == 'newmodel': nnname = ucfg['model'] g.write_pdf(".//outputs//tf//" + nnname + '.pdf') return model, isconv
question = layers.Input(shape=(query_maxlen, ), dtype='int32') encoded_question = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(question) encoded_question = layers.Dropout(0.3)(encoded_question) encoded_question = RNN(EMBED_HIDDEN_SIZE)(encoded_question) encoded_question = layers.RepeatVector(story_maxlen)(encoded_question) merged = layers.add([encoded_sentence, encoded_question]) merged = RNN(EMBED_HIDDEN_SIZE)(merged) merged = layers.Dropout(0.3)(merged) preds = layers.Dense(vocab_size, activation='softmax')(merged) model = get_model( token_num=story_maxlen, head_num=5, transformer_num=12, embed_dim=25, feed_forward_dim=100, seq_len=20, pos_num=20, ) token_dict = get_base_dict() # A dict that contains some special tokens for token in story: if token not in token_dict: token_dict[token] = len(token_dict) token_list = list(token_dict.keys()) # Used for selecting a random word def _generator(): while True: yield gen_batch_inputs( [sentence, question],
] # Build token dictionary token_dict = get_base_dict() # A dict that contains some special tokens for pairs in sentence_pairs: for token in pairs[0] + pairs[1]: if token not in token_dict: token_dict[token] = len(token_dict) token_list = list(token_dict.keys()) # Used for selecting a random word # Build & train the model model = get_model( token_num=len(token_dict), head_num=5, transformer_num=12, embed_dim=25, feed_forward_dim=100, seq_len=20, pos_num=20, dropout_rate=0.05, ) model.summary() def _generator(): while True: yield gen_batch_inputs( sentence_pairs, token_dict, token_list, seq_len=20, mask_rate=0.3,
question = layers.Input(shape=(query_maxlen,), dtype='int32') encoded_question = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(question) encoded_question = layers.Dropout(0.3)(encoded_question) encoded_question = RNN(EMBED_HIDDEN_SIZE)(encoded_question) encoded_question = layers.RepeatVector(story_maxlen)(encoded_question) merged = layers.add([encoded_sentence, encoded_question]) merged = RNN(EMBED_HIDDEN_SIZE)(merged) merged = layers.Dropout(0.3)(merged) preds = layers.Dense(vocab_size, activation='softmax')(merged) model = get_model( token_num=len(story_maxlen), head_num=5, transformer_num=12, embed_dim=25, feed_forward_dim=100, seq_len=20, pos_num=20, dropout=0.05, ) def _generator(): while True: yield gen_batch_inputs( vocab, token_dict, token_list, seq_len=20, mask_rate=0.3, swap_sentence_rate=1.0, )
# Build token dictionary token_dict = get_base_dict() # A dict that contains some special tokens for pairs in sentence_pairs: for token in pairs[0] + pairs[1]: if token not in token_dict: token_dict[token] = len(token_dict) token_list = list(token_dict.keys()) # Used for selecting a random word # Build & train the model model = get_model( token_num=len(token_dict), head_num=5, transformer_num=12, embed_dim=25, feed_forward_dim=100, seq_len=20, pos_num=20, dropout_rate=0.05, ) compile_model(model) model.summary() def _generator(): while True: yield gen_batch_inputs( sentence_pairs, token_dict, token_list, seq_len=20, mask_rate=0.3,
import os import keras from keras_bert import get_model model = get_model( token_num=30000, pos_num=512, transformer_num=12, head_num=12, embed_dim=768, feed_forward_dim=768 * 4, ) model.summary(line_length=120) current_path = os.path.dirname(os.path.abspath(__file__)) output_path = os.path.join(current_path, 'bert_small.png') keras.utils.plot_model(model, show_shapes=True, to_file=output_path) model = get_model( token_num=30000, pos_num=512, transformer_num=24, head_num=16, embed_dim=1024, feed_forward_dim=1024 * 4, ) model.summary(line_length=120) output_path = os.path.join(current_path, 'bert_big.png') keras.utils.plot_model(model, show_shapes=True, to_file=output_path)
def build_model_from_config( config_file, checkpoint_file, training=False, trainable=False, seq_len=None, ): """Build the model from config file. :param config_file: The path to the JSON configuration file. :param training: If training, the whole model will be returned. :param trainable: Whether the model is trainable. :param seq_len: If it is not None and it is shorter than the value in the config file, the weights in position embeddings will be sliced to fit the new length. :return: model and config """ with open(config_file, 'r') as reader: config = json.loads(reader.read()) if seq_len is not None: config['max_position_embeddings'] = min( seq_len, config['max_position_embeddings']) if trainable is None: trainable = training model = get_model( token_num=config['vocab_size'], pos_num=config['max_position_embeddings'], seq_len=config['max_position_embeddings'], embed_dim=config['hidden_size'], transformer_num=config['num_hidden_layers'], head_num=config['num_attention_heads'], feed_forward_dim=config['intermediate_size'], training=False, trainable=True, ) # SetLearningRate(model,0.00001,True) inputs, outputs = model t_in = Input(shape=(None, )) s_in = Input(shape=(None, )) k1_in = Input(shape=(1, )) k2_in = Input(shape=(1, )) o1_in = Input(shape=(None, )) o2_in = Input(shape=(None, )) t, s, k1, k2, o1, o2 = t_in, s_in, k1_in, k2_in, o1_in, o2_in mask = Lambda( lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))( inputs[0]) outputs = Dropout(0.5)(outputs) attention = TimeDistributed(Dense(1, activation='tanh'))(outputs) attention = MaskFlatten()(attention) attention = Activation('softmax')(attention) attention = MaskRepeatVector(config['hidden_size'])(attention) attention = MaskPermute([2, 1])(attention) sent_representation = multiply([outputs, attention]) attention = Lambda(lambda xin: K.sum(xin, axis=1))(sent_representation) t_dim = K.int_shape(outputs)[-1] h = Lambda(seq_and_vec, output_shape=(None, t_dim * 2))([outputs, attention]) conv1 = MaskedConv1D()(h) ps = Dense(3, activation='softmax')(conv1) subject_model = keras.models.Model([inputs[0], inputs[1]], [ps]) # 预测subject的模型 ##预测o1,o2 k1 = Lambda(seq_gather, output_shape=(t_dim, ))([outputs, k1]) k2 = Lambda(seq_gather, output_shape=(t_dim, ))([outputs, k2]) k = Concatenate()([k1, k2]) h = Lambda(seq_and_vec, output_shape=(None, t_dim * 2))([outputs, attention]) h = Lambda(seq_and_vec, output_shape=(None, t_dim * 4))([h, k]) h = Concatenate(axis=-1)([h, conv1]) h = MaskedConv1D()(h) po1 = Dense(num_classes + 1, activation='softmax')(h) po2 = Dense(num_classes + 1, activation='softmax')(h) object_model = keras.models.Model( [inputs[0], inputs[1], k1_in, k2_in], [po1, po2]) # 输入text和subject,预测object及其关系 train_model = keras.models.Model( inputs=[inputs[0], inputs[1], s_in, k1_in, k2_in, o1_in, o2_in], outputs=[ps, po1, po2]) s_loss = K.sparse_categorical_crossentropy(s, ps) s_loss = K.sum(s_loss * mask[:, :, 0]) / K.sum(mask) o1_loss = K.sparse_categorical_crossentropy(o1, po1) o1_loss = K.sum(o1_loss * mask[:, :, 0]) / K.sum(mask) o2_loss = K.sparse_categorical_crossentropy(o2, po2) o2_loss = K.sum(o2_loss * mask[:, :, 0]) / K.sum(mask) train_model.add_loss(s_loss + o1_loss + o2_loss) train_model.summary() train_model.compile(optimizer=keras.optimizers.Adam(lr=3e-5), ) load_model_weights_from_checkpoint(train_model, config, checkpoint_file, training) return train_model, subject_model, object_model
## Build Token Dictionary token_dict = get_base_dict() for pairs in sentence_pairs: for token in pairs[0] + pairs[1]: if token not in token_dict: token_dict[token] = len(token_dict) token_list = list(token_dict.keys()) print("token_dict size: ", len(token_dict)) ## Build Keras Model model = get_model( token_num=len(token_dict), head_num=5, transformer_num=12, embed_dim=25, feed_forward_dim=100, seq_len=20, pos_num=20, dropout_rate=0.05, ) compile_model(model) model.summary() ## Train Keras Model def _generator(): while True: yield gen_batch_inputs( sentence_pairs, token_dict, token_list,
batch_size=16) def _get_session(): tf_config = tf.ConfigProto(use_per_session_threads=True, allow_soft_placement=True) tf_config.gpu_options.allow_growth = True return tf.Session(graph=tf.get_default_graph(), config=tf_config) K.set_session(_get_session()) # Build & train the model model = get_model(token_num=len(token_dict), embed_dim=256, head_num=4, transformer_num=6, seq_len=seq_len) model.summary() model.fit_generator( generator=_generator(), steps_per_epoch=4000, epochs=100, validation_data=_generator(), validation_steps=100, verbose=1, callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)], ) model.save_weights('bert_nlg.hdf5')
def test_fit(self): current_path = os.path.dirname(os.path.abspath(__file__)) model_path = os.path.join(current_path, 'test_bert_fit.h5') sentence_pairs = [ [['all', 'work', 'and', 'no', 'play'], ['makes', 'jack', 'a', 'dull', 'boy']], [['from', 'the', 'day', 'forth'], ['my', 'arm', 'changed']], [['and', 'a', 'voice', 'echoed'], ['power', 'give', 'me', 'more', 'power']], ] token_dict = get_base_dict() for pairs in sentence_pairs: for token in pairs[0] + pairs[1]: if token not in token_dict: token_dict[token] = len(token_dict) token_list = list(token_dict.keys()) if os.path.exists(model_path): model = keras.models.load_model( model_path, custom_objects=get_custom_objects(), ) else: model = get_model( token_num=len(token_dict), head_num=5, transformer_num=12, embed_dim=25, feed_forward_dim=100, seq_len=20, pos_num=20, dropout_rate=0.05, attention_activation=gelu, lr=1e-3, decay_steps=30000, warmup_steps=10000, weight_decay=1e-3, ) model.summary() def _generator(): while True: yield gen_batch_inputs( sentence_pairs, token_dict, token_list, seq_len=20, mask_rate=0.3, swap_sentence_rate=1.0, ) model.fit_generator( generator=_generator(), steps_per_epoch=1000, epochs=1, validation_data=_generator(), validation_steps=100, ) # model.save(model_path) for inputs, outputs in _generator(): predicts = model.predict(inputs) outputs = list(map(lambda x: np.squeeze(x, axis=-1), outputs)) predicts = list(map(lambda x: np.argmax(x, axis=-1), predicts)) batch_size, seq_len = inputs[-1].shape for i in range(batch_size): for j in range(seq_len): if inputs[-1][i][j]: self.assertEqual(outputs[0][i][j], predicts[0][i][j]) self.assertTrue(np.allclose(outputs[1], predicts[1])) break
bert_config = { 'token_num': len(token_dict), 'head_num': 6, # было 4 'transformer_num': 4, 'embed_dim': 36, 'feed_forward_dim': 150, # было 100 'seq_len': max_seq_len, 'pos_num': max_seq_len, 'dropout_rate': 0.05, } with open('../tmp/bert.config', 'w') as f: json.dump(bert_config, f) # Build & train the model model = get_model(**bert_config) model.summary() #for layer in model.layers: # print('{}: {} --> {}'.format(layer.name, layer.input_shape, layer.output_shape)) def my_generator(samples, batch_size): while True: start_index = 0 while (start_index + batch_size) < len(samples): if False: print( u'DEBUG\nstart_index={}\nphrase1 len={} words={}\nphrase2 len={} words={}\n' .format(start_index, len(samples[start_index][0]), u' '.join(samples[start_index][0]),
import os import keras from keras_bert import get_model model = get_model( token_num=30000, pos_num=512, transformer_num=12, head_num=12, embed_dim=768, feed_forward_dim=768 * 4, ) model.summary(line_length=120) current_path = os.path.dirname(os.path.abspath(__file__)) output_path = os.path.join(current_path, 'bert_small.png') keras.utils.plot_model(model, show_shapes=True, to_file=output_path) model = get_model( token_num=30000, pos_num=512, transformer_num=24, head_num=16, embed_dim=1024, feed_forward_dim=1024 * 4, ) model.summary(line_length=120) output_path = os.path.join(current_path, 'bert_big.png') keras.utils.plot_model(model, show_shapes=True, to_file=output_path) inputs, outputs = get_model( token_num=30000,
] # 构建自定义词典 token_dict = get_base_dict() # 初始化特殊符号,如`[CLS]` for pairs in sentence_pairs: for token in pairs[0] + pairs[1]: if token not in token_dict: token_dict[token] = len(token_dict) token_list = list(token_dict.keys()) # Used for selecting a random word # 构建和训练模型 model = get_model( token_num=len(token_dict), head_num=5, transformer_num=12, embed_dim=25, feed_forward_dim=100, seq_len=20, pos_num=20, dropout_rate=0.05, ) compile_model(model) model.summary() plot_model(model, to_file="model.png", show_shapes=True) def _generator(): while True: yield gen_batch_inputs( sentence_pairs, token_dict,