def test_get_token_embedding(self): model = get_model( token_num=200, head_num=3, transformer_num=2, attention_activation='gelu', ) embed = get_token_embedding(model) self.assertEqual((200, 768), K.int_shape(embed))
def test_save_load_json(self): model = get_model( token_num=200, head_num=3, transformer_num=2, attention_activation='gelu', ) compile_model(model) data = model.to_json() set_custom_objects() model = keras.models.model_from_json(data) model.summary()
def test_sample(self): model = get_model( token_num=200, head_num=3, transformer_num=2, ) model_path = os.path.join(tempfile.gettempdir(), 'keras_bert_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model( model_path, custom_objects=get_custom_objects(), ) model.summary(line_length=200)
def test_extract_embeddings_variable_lengths(self): tokens = [ '[PAD]', '[UNK]', '[CLS]', '[SEP]', 'all', 'work', 'and', 'no', 'play', 'makes', 'jack', 'a', 'dull', 'boy', '~', ] token_dict = {token: i for i, token in enumerate(tokens)} inputs, outputs = get_model( token_num=len(tokens), pos_num=20, seq_len=None, embed_dim=13, transformer_num=1, feed_forward_dim=17, head_num=1, training=False, ) model = keras.models.Model(inputs, outputs) embeddings = extract_embeddings( model, [ ('all work and no play', 'makes jack'), ('a dull boy', 'all work and no play and no play'), ], vocabs=token_dict, batch_size=2, ) self.assertEqual(2, len(embeddings)) self.assertEqual((10, 13), embeddings[0].shape) self.assertEqual((14, 13), embeddings[1].shape)
def test_task_embed(self): inputs, outputs = get_model( token_num=20, embed_dim=12, head_num=3, transformer_num=2, use_task_embed=True, task_num=10, training=False, dropout_rate=0.0, ) model = keras.models.Model(inputs, outputs) model_path = os.path.join(tempfile.gettempdir(), 'keras_bert_%f.h5' % np.random.random()) model.save(model_path) model = keras.models.load_model( model_path, custom_objects=get_custom_objects(), ) model.summary(line_length=200)
import os import keras from bertTAT.bert import get_model # 1. 保存小型结构bert model = get_model( token_num=30000, pos_num=512, transformer_num=12, head_num=12, embed_dim=768, feed_forward_dim=768 * 4, ) model.summary(line_length=120) # 打印输出模型结构 current_path = os.path.dirname(os.path.abspath(__file__)) output_path = os.path.join(current_path, 'bert_small.png') keras.utils.plot_model(model, show_shapes=True, to_file=output_path) # 2. 保存大型结构bert model = get_model( token_num=30000, pos_num=512, transformer_num=24, head_num=16, embed_dim=1024, feed_forward_dim=1024 * 4, ) model.summary(line_length=120) output_path = os.path.join(current_path, 'bert_big.png') keras.utils.plot_model(model, show_shapes=True, to_file=output_path)
def test_fit(self): current_path = os.path.dirname(os.path.abspath(__file__)) model_path = os.path.join(current_path, 'test_bert_fit.h5') sentence_pairs = [ [['all', 'work', 'and', 'no', 'play'], ['makes', 'jack', 'a', 'dull', 'boy']], [['from', 'the', 'day', 'forth'], ['my', 'arm', 'changed']], [['and', 'a', 'voice', 'echoed'], ['power', 'give', 'me', 'more', 'power']], ] token_dict = get_base_dict() for pairs in sentence_pairs: for token in pairs[0] + pairs[1]: if token not in token_dict: token_dict[token] = len(token_dict) token_list = list(token_dict.keys()) if os.path.exists(model_path): steps_per_epoch = 10 model = keras.models.load_model( model_path, custom_objects=get_custom_objects(), ) else: steps_per_epoch = 1000 model = get_model( token_num=len(token_dict), head_num=5, transformer_num=12, embed_dim=25, feed_forward_dim=100, seq_len=20, pos_num=20, dropout_rate=0.05, attention_activation='gelu', ) compile_model( model, learning_rate=1e-3, decay_steps=30000, warmup_steps=10000, weight_decay=1e-3, ) model.summary() def _generator(): while True: yield gen_batch_inputs( sentence_pairs, token_dict, token_list, seq_len=20, mask_rate=0.3, swap_sentence_rate=1.0, ) model.fit_generator( generator=_generator(), steps_per_epoch=steps_per_epoch, epochs=1, validation_data=_generator(), validation_steps=steps_per_epoch // 10, ) # model.save(model_path) for inputs, outputs in _generator(): predicts = model.predict(inputs) outputs = list(map(lambda x: np.squeeze(x, axis=-1), outputs)) predicts = list(map(lambda x: np.argmax(x, axis=-1), predicts)) batch_size, seq_len = inputs[-1].shape for i in range(batch_size): match, total = 0, 0 for j in range(seq_len): if inputs[-1][i][j]: total += 1 if outputs[0][i][j] == predicts[0][i][j]: match += 1 self.assertGreater(match, total * 0.9) self.assertTrue(np.allclose(outputs[1], predicts[1])) break