Ejemplo n.º 1
0
 def test_extract_embeddings_variable_lengths(self):
     tokens = [
         '[PAD]', '[UNK]', '[CLS]', '[SEP]',
         'all', 'work', 'and', 'no', 'play',
         'makes', 'jack', 'a', 'dull', 'boy', '~',
     ]
     token_dict = {token: i for i, token in enumerate(tokens)}
     inputs, outputs = get_model(
         token_num=len(tokens),
         pos_num=20,
         seq_len=None,
         embed_dim=13,
         transformer_num=1,
         feed_forward_dim=17,
         head_num=1,
         training=False,
     )
     model = keras.models.Model(inputs, outputs)
     embeddings = extract_embeddings(
         model,
         [
             ('all work and no play', 'makes jack'),
             ('a dull boy', 'all work and no play and no play'),
         ],
         vocabs=token_dict,
         batch_size=2,
     )
     self.assertEqual(2, len(embeddings))
     self.assertEqual((10, 13), embeddings[0].shape)
     self.assertEqual((14, 13), embeddings[1].shape)
Ejemplo n.º 2
0
 def load_bert_mc(self):
     b_model = get_model(token_num=21128, )  ##21128是词典大小
     compile_model(b_model)
     bert_model = Model(
         inputs=b_model.input[:2],
         outputs=b_model.get_layer('Encoder-12-FeedForward-Norm').output)
     x1_in = Input(shape=(None, ))  # 问题和资料的拼接句子输入
     x2_in = Input(shape=(None, ))  # 问题和资料的拼接句子输入
     s1_in = Input(shape=(None, ))  #答案的左边界(标签)
     s2_in = Input(shape=(None, ))  #答案的右边界(标签)
     x1, x2, s1, s2 = x1_in, x2_in, s1_in, s2_in
     x_mask = Lambda(
         lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(x1)
     x = bert_model([x1, x2])
     ps1 = Dense(1, use_bias=False)(x)
     ###[[0.1],[0.2],[0.3]..] -> [0.1,0.2,0.3,...]
     ###[0.1,0.2,0.3,...] - [0,0,0,0,1,1,1,1]*1e10
     ps1 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)(
         [ps1, x_mask])
     # ps1 = Lambda(lambda x: x[0]*x[1])([ps1, x_mask])
     ps2 = Dense(1, use_bias=False)(x)
     ps2 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)(
         [ps2, x_mask])
     # ps2 = Lambda(lambda x:x[0]*x[1])([ps2, x_mask])
     model = Model([x1_in, x2_in], [ps1, ps2])
     model.load_weights(self.model_path)
     return model
Ejemplo n.º 3
0
 def test_save_load_json(self):
     model = get_model(
         token_num=200,
         head_num=3,
         transformer_num=2,
     )
     data = model.to_json()
     model = keras.models.model_from_json(data, custom_objects=get_custom_objects())
     model.summary()
Ejemplo n.º 4
0
 def test_get_token_embedding(self):
     model = get_model(
         token_num=200,
         head_num=3,
         transformer_num=2,
         attention_activation='gelu',
     )
     embed = get_token_embedding(model)
     self.assertEqual((200, 768), K.int_shape(embed))
Ejemplo n.º 5
0
 def test_save_load_json(self):
     model = get_model(
         token_num=200,
         head_num=3,
         transformer_num=2,
         attention_activation='gelu',
     )
     data = model.to_json()
     set_custom_objects()
     model = keras.models.model_from_json(data)
     model.summary()
Ejemplo n.º 6
0
 def test_sample(self):
     model = get_model(
         token_num=200,
         head_num=3,
         transformer_num=2,
     )
     model_path = os.path.join(tempfile.gettempdir(), 'keras_bert_%f.h5' % np.random.random())
     model.save(model_path)
     model = keras.models.load_model(
         model_path,
         custom_objects=get_custom_objects(),
     )
     model.summary(line_length=200)
Ejemplo n.º 7
0
 def test_sample(self):
     model = get_model(
         token_num=200,
         head_num=3,
         transformer_num=2,
     )
     model_path = os.path.join(tempfile.gettempdir(),
                               'keras_bert_%f.h5' % np.random.random())
     model.save(model_path)
     from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
     with CustomObjectScope(get_custom_objects(
     )):  # Workaround for incorrect global variable used in keras
         model = keras.models.load_model(
             model_path,
             custom_objects=get_custom_objects(),
         )
     model.summary(line_length=200)
Ejemplo n.º 8
0
 def test_task_embed(self):
     inputs, outputs = get_model(
         token_num=20,
         embed_dim=12,
         head_num=3,
         transformer_num=2,
         use_task_embed=True,
         task_num=10,
         training=False,
         dropout_rate=0.0,
     )
     model = keras.models.Model(inputs, outputs)
     model_path = os.path.join(tempfile.gettempdir(), 'keras_bert_%f.h5' % np.random.random())
     model.save(model_path)
     model = keras.models.load_model(
         model_path,
         custom_objects=get_custom_objects(),
     )
     model.summary(line_length=200)
Ejemplo n.º 9
0
    def creat_model(self):
        model = keras_bert.get_model(token_num=self.vocab_size,
                                     seq_len = max_len,
                                     dropout_rate = drop_rate,
                                     )
        inputs = model.inputs
        embedding = model.get_layer('Encoder-12-FeedForward-Norm').output
        print("Inputs shape:"+str((np.array(inputs)).shape))
        print(embedding.shape)
        x = Bidirectional(LSTM(units=self.rnn_units, return_sequences=True))(embedding)
        x = Dropout(self.drop_rate)(x)
        x = Dense(self.n_class)(x)
        self.crf = CRF(self.n_class, sparse_target=False)
        x = self.crf(x)
        self.model = Model(inputs=inputs, outputs=x)
        self.model.summary()
        self.compile()

        return self.model
Ejemplo n.º 10
0
    def test_get_layers(self):
        def _custom_layers(x, trainable=True):
            return keras.layers.LSTM(
                units=768,
                trainable=trainable,
                return_sequences=True,
                name='LSTM',
            )(x)

        inputs, output_layer = get_model(
            token_num=200,
            embed_dim=768,
            custom_layers=_custom_layers,
            training=False,
        )
        model = keras.models.Model(inputs=inputs, outputs=output_layer)
        model.compile(optimizer='adam', loss='mse')
        model.summary()
        self.assertTrue(model is not None)
Ejemplo n.º 11
0
 def test_save_load_json(self):
     model = get_model(
         token_num=200,
         head_num=3,
         transformer_num=2,
         attention_activation='gelu',
     )
     compile_model(model)
     data = model.to_json()
     # 保存模型到json文件
     with open('./mybert_config.json', 'w') as file:
         file.write(data)
     set_custom_objects()
     # # 从json对象中加载模型
     # model = keras.models.model_from_json(data)
     # 从json文件中加载模型
     with open('./mybert_config.json', 'r') as file:
         model_json = file.read()
     model = keras.models.model_from_json(model_json)
     model.summary()
Ejemplo n.º 12
0
 def test_task_embed(self):
     inputs, outputs = get_model(
         token_num=20,
         embed_dim=12,
         head_num=3,
         transformer_num=2,
         use_task_embed=True,
         task_num=10,
         training=False,
         dropout_rate=0.0,
     )
     model = keras.models.Model(inputs, outputs)
     model_path = os.path.join(tempfile.gettempdir(),
                               'keras_bert_%f.h5' % np.random.random())
     model.save(model_path)
     from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
     with CustomObjectScope(get_custom_objects(
     )):  # Workaround for incorrect global variable used in keras
         model = keras.models.load_model(
             model_path,
             custom_objects=get_custom_objects(),
         )
     model.summary(line_length=200)
Ejemplo n.º 13
0
     ['makes', 'jack', 'a', 'dull', 'boy']],
    [['from', 'the', 'day', 'forth'], ['my', 'arm', 'changed']],
    [['and', 'a', 'voice', 'echoed'], ['power', 'give', 'me', 'more',
                                       'power']],
]
# sentence_pairs = [i for i in stp()]
token_dict = get_base_dict()
for pairs in sentence_pairs:
    for token in pairs[0] + pairs[1]:
        if token not in token_dict:
            token_dict[token] = len(token_dict)
token_list = list(token_dict.keys())
model = get_model(token_num=len(token_dict),
                  head_num=5,
                  transformer_num=12,
                  embed_dim=25,
                  feed_forward_dim=100,
                  seq_len=20,
                  pos_num=20,
                  dropout_rate=0.1)
model.summary()


def _generator():
    while True:
        yield gen_batch_inputs(
            sentence_pairs,
            token_dict,
            token_list,
            seq_len=20,
            mask_rate=0.3,
            swap_sentence_rate=1.0,
Ejemplo n.º 14
0
def build_model_from_config(
    config_file,
    checkpoint_file,
    training=False,
    trainable=False,
    seq_len=None,
):
    """Build the model from config file.

    :param config_file: The path to the JSON configuration file.
    :param training: If training, the whole model will be returned.
    :param trainable: Whether the model is trainable.
    :param seq_len: If it is not None and it is shorter than the value in the config file, the weights in
                    position embeddings will be sliced to fit the new length.
    :return: model and config
    """
    with open(config_file, 'r') as reader:
        config = json.loads(reader.read())
    if seq_len is not None:
        config['max_position_embeddings'] = min(
            seq_len, config['max_position_embeddings'])
    if trainable is None:
        trainable = training
    model = get_model(
        token_num=config['vocab_size'],
        pos_num=config['max_position_embeddings'],
        seq_len=config['max_position_embeddings'],
        embed_dim=config['hidden_size'],
        transformer_num=config['num_hidden_layers'],
        head_num=config['num_attention_heads'],
        feed_forward_dim=config['intermediate_size'],
        training=False,
        trainable=True,
    )
    inputs, outputs = model
    bio_label = Input(shape=(maxlen, ))
    event = Input(shape=(1, ))

    mask = Lambda(
        lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(
            inputs[0])
    event_embedding = Embedding(len(event2id), hidden_size,
                                mask_zero=True)(event)

    outputs = Dropout(0.15)(outputs)
    attention = TimeDistributed(Dense(1, activation='tanh'))(outputs)
    attention = MaskFlatten()(attention)
    attention = Activation('softmax')(attention)
    attention = MaskRepeatVector(config['hidden_size'])(attention)
    attention = MaskPermute([2, 1])(attention)
    sent_representation = multiply([outputs, attention])
    attention = Lambda(lambda xin: K.sum(xin, axis=1))(sent_representation)
    t_dim = K.int_shape(outputs)[-1]
    bert_attention = Lambda(seq_and_vec,
                            output_shape=(None,
                                          t_dim * 2))([outputs, attention])

    cnn1 = MaskedConv1D(filters=hidden_size,
                        kernel_size=3,
                        activation='relu',
                        padding='same')(bert_attention)
    event_bc = Lambda(lambda input: input[0] * 0 + input[1])(
        [cnn1, event_embedding])
    con_cnn_event = Concatenate(axis=-1)([cnn1, event_bc])
    dens1 = Dense(hidden_size, activation='relu', use_bias=True)(con_cnn_event)
    #BIOE
    bio_pred = Dense(4, activation='softmax')(dens1)
    entity_model = keras.models.Model([inputs[0], inputs[1], event],
                                      [bio_pred])  # 预测subject的模型
    train_model = keras.models.Model([inputs[0], inputs[1], bio_label, event],
                                     [bio_pred])

    loss = K.sparse_categorical_crossentropy(bio_label, bio_pred)
    loss = K.sum(loss * mask[:, :, 0]) / K.sum(mask)

    train_model.add_loss(loss)
    train_model.summary()
    train_model.compile(optimizer=keras.optimizers.Adam(lr=3e-5), )
    load_model_weights_from_checkpoint(train_model, config, checkpoint_file,
                                       training)
    return train_model, entity_model
model_path = "./{}/".format(BASE_MODEL_DIR)
seq_len = 128
with open(model_path + "bert_config.json", 'r') as reader:
    config = json.loads(reader.read())
if seq_len is not None:
    config['max_position_embeddings'] = seq_len = min(
        seq_len, config['max_position_embeddings'])

bert = get_model(
    token_num=config['vocab_size'],
    pos_num=config['max_position_embeddings'],
    seq_len=seq_len,
    embed_dim=config['hidden_size'],
    transformer_num=config['num_hidden_layers'],
    head_num=config['num_attention_heads'],
    feed_forward_dim=config['intermediate_size'],
    feed_forward_activation=config['hidden_act'],
    training=None,
    trainable=True,
    output_layer_num=1,
)

inputs, outputs = bert
print(type(bert), type(outputs))
load_model_weights_from_checkpoint(outputs, config,
                                   model_path + "bert_model.ckpt")

x1 = Input(shape=(None, ))
x2 = Input(shape=(None, ))
bert_out = outputs.output([x1, x2])
Ejemplo n.º 16
0
                                               training=True,
                                               trainable=True,
                                               seq_len=seq_len)
    Input_layer = model.inputs[:2]
    x = model.layers[-9].output
    x = BatchNormalization()(x)
    x = Lambda(lambda model: model[:, 0])(x)
    x = BatchNormalization()(x)
    x = Dropout(0.2)(x)
    Output_layer = Dense(3, activation='sigmoid')(x)
    model = Model(Input_layer, Output_layer)
    model.load_weights(sys.argv[1])
    return model


model = get_model()
Y_pred = model.predict([X, seg], verbose=1)
'''
print(Y_pred_by_cate)
Y_pred_by_cate = np.load('./Y_test_pred_by_category.npy')
Y_pred = (3*Y_pred + Y_pred_by_cate) / 4
'''
Y_pred = (Y_pred > 0.5)
other_pred = np.sum(Y_pred, axis=1) < 0.9
Y = np.hstack((Y_pred, other_pred.reshape(-1, 1))).astype('int')

opt_path = sys.argv[2]
f = open(opt_path, 'w')
wt_str = 'order_id,THEORETICAL,ENGINEERING,EMPIRICAL,OTHERS\n'
f.write(wt_str)
for i in range(Y_pred.shape[0]):
Ejemplo n.º 17
0
# @title Load Basic Model
import codecs
from keras_bert import load_trained_model_from_checkpoint, get_model, compile_model

token_dict = {}
with codecs.open(vocab_path, 'r', 'utf8') as reader:
    for line in reader:
        token = line.strip()
        token_dict[token] = len(token_dict)

with strategy.scope():

    model = get_model(
        token_num=200000,
        #        head_num=25,
        #        transformer_num=20,
        embed_dim=4 8 *2,
        feed_forward_dim=512,
        seq_len=100,
        pos_num=100,  # 128
        training=True,
        trainable=None,
        dropout_rate=0.1,
    )
    compile_model(model)

#    model = load_trained_model_from_checkpoint(
#        config_path,
#        checkpoint_path,
#        training=True,
Ejemplo n.º 18
0
def GetModel(ucfg):
    ''' ucfg: user's Config for the table output: nnname, BS, BPE '''

    nnname = ucfg['nnname']
    isconv = True

    if nnname == 'newmodel':
        import sys
        sys.path.append("..")
        from newmodel import tfmodel
        model, isconv = tfmodel()
        sys.path.remove("..")

    import tensorflow.keras.applications as nn
    if hasattr(nn, nnname):
        model = getattr(nn, nnname)(weights=None)

    # efficientnet: B0-B7
    elif nnname[:-2] == 'EfficientNet':
        import tfmodels.efficientnet.tfkeras as nn
        model = getattr(nn, nnname)(weights=None)

    # TF2.x Models:
    elif nnname == 'ncf':
        import tfmodels.ncf as nn
        name = 'ncfmodel'
        model = getattr(nn, name)(istrain=False)
        isconv = False

    elif nnname == 'din':
        import tfmodels.din as nn
        name = 'din'
        _, model = getattr(nn, name)(item_count=63001,
                                     cate_count=801,
                                     hidden_units=128)
        isconv = False

    # bert from bert_keras
    elif nnname == 'bert':
        isconv = False
        from keras_bert import get_base_dict, get_model, compile_model
        # Build token dictionary
        token_dict = get_base_dict()
        training = True
        if training:
            # # bert base
            # embed_dim=768 # bert small
            # headnum=12
            # layernum=12
            # bert large
            embed_dim = 1024  # bert small
            headnum = 16
            layernum = 24

            ff_dim = embed_dim * 4
            token_num = 30522  # number of words from paper
            model = get_model(token_num=token_num,
                              pos_num=512,
                              seq_len=512,
                              embed_dim=embed_dim,
                              transformer_num=layernum,
                              head_num=headnum,
                              feed_forward_dim=ff_dim,
                              training=training)
        else:
            # Revise lib\site-packages\keras_bert\bert.py: line164
            # "return inputs, transformed" -> "return inputs, transformed,model"
            _, _, model = get_model(token_num=len(token_dict),
                                    embed_dim=1024,
                                    head_num=16,
                                    training=training)

        compile_model(model)

    if nnname == 'mymodel':
        isconv = False

        ## ===== To add a customized model ====
        # refer to: https://keras.io/guides/sequential_model/
        from tensorflow.keras import layers
        # Define a customized model
        model = keras.Sequential()
        model.add(keras.Input(shape=(250, 250, 3)))  # 250x250 RGB images
        model.add(layers.Conv2D(32, 5, strides=2, activation="relu"))
        model.add(layers.Conv2D(32, 3, activation="relu"))
        model.add(layers.MaxPooling2D(3))
        model.add(layers.Conv2D(32, 3, activation="relu"))
        model.add(layers.Conv2D(32, 3, activation="relu"))
        model.add(layers.MaxPooling2D(3))
        model.add(layers.Conv2D(32, 3, activation="relu"))
        model.add(layers.Conv2D(32, 3, activation="relu"))
        model.add(layers.MaxPooling2D(2))
        # Now that we have 4x4 feature maps, time to apply global max pooling.
        model.add(layers.GlobalMaxPooling2D())
        # Finally, we add a classification layer.
        model.add(layers.Dense(10))

        ## ===== end of your codes  ======

    if True:
        g = keras.utils.model_to_dot(model, show_shapes=True)
        if nnname == 'newmodel':
            nnname = ucfg['model']
        g.write_pdf(".//outputs//tf//" + nnname + '.pdf')
    return model, isconv
Ejemplo n.º 19
0
question = layers.Input(shape=(query_maxlen, ), dtype='int32')
encoded_question = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(question)
encoded_question = layers.Dropout(0.3)(encoded_question)
encoded_question = RNN(EMBED_HIDDEN_SIZE)(encoded_question)
encoded_question = layers.RepeatVector(story_maxlen)(encoded_question)

merged = layers.add([encoded_sentence, encoded_question])
merged = RNN(EMBED_HIDDEN_SIZE)(merged)
merged = layers.Dropout(0.3)(merged)
preds = layers.Dense(vocab_size, activation='softmax')(merged)

model = get_model(
    token_num=story_maxlen,
    head_num=5,
    transformer_num=12,
    embed_dim=25,
    feed_forward_dim=100,
    seq_len=20,
    pos_num=20,
)
token_dict = get_base_dict()  # A dict that contains some special tokens
for token in story:
    if token not in token_dict:
        token_dict[token] = len(token_dict)
token_list = list(token_dict.keys())  # Used for selecting a random word


def _generator():
    while True:
        yield gen_batch_inputs(
            [sentence, question],
Ejemplo n.º 20
0
]

# Build token dictionary
token_dict = get_base_dict()  # A dict that contains some special tokens
for pairs in sentence_pairs:
    for token in pairs[0] + pairs[1]:
        if token not in token_dict:
            token_dict[token] = len(token_dict)
token_list = list(token_dict.keys())  # Used for selecting a random word

# Build & train the model
model = get_model(
    token_num=len(token_dict),
    head_num=5,
    transformer_num=12,
    embed_dim=25,
    feed_forward_dim=100,
    seq_len=20,
    pos_num=20,
    dropout_rate=0.05,
)
model.summary()


def _generator():
    while True:
        yield gen_batch_inputs(
            sentence_pairs,
            token_dict,
            token_list,
            seq_len=20,
            mask_rate=0.3,
question = layers.Input(shape=(query_maxlen,), dtype='int32')
encoded_question = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(question)
encoded_question = layers.Dropout(0.3)(encoded_question)
encoded_question = RNN(EMBED_HIDDEN_SIZE)(encoded_question)
encoded_question = layers.RepeatVector(story_maxlen)(encoded_question)

merged = layers.add([encoded_sentence, encoded_question])
merged = RNN(EMBED_HIDDEN_SIZE)(merged)
merged = layers.Dropout(0.3)(merged)
preds = layers.Dense(vocab_size, activation='softmax')(merged)

model = get_model(
    token_num=len(story_maxlen),
    head_num=5,
    transformer_num=12,
    embed_dim=25,
    feed_forward_dim=100,
    seq_len=20,
    pos_num=20,
    dropout=0.05,
)

def _generator():
    while True:
        yield gen_batch_inputs(
            vocab,
            token_dict,
            token_list,
            seq_len=20,
            mask_rate=0.3,
            swap_sentence_rate=1.0,
        )
Ejemplo n.º 22
0
# Build token dictionary
token_dict = get_base_dict()  # A dict that contains some special tokens
for pairs in sentence_pairs:
    for token in pairs[0] + pairs[1]:
        if token not in token_dict:
            token_dict[token] = len(token_dict)
token_list = list(token_dict.keys())  # Used for selecting a random word


# Build & train the model
model = get_model(
    token_num=len(token_dict),
    head_num=5,
    transformer_num=12,
    embed_dim=25,
    feed_forward_dim=100,
    seq_len=20,
    pos_num=20,
    dropout_rate=0.05,
)
compile_model(model)
model.summary()

def _generator():
    while True:
        yield gen_batch_inputs(
            sentence_pairs,
            token_dict,
            token_list,
            seq_len=20,
            mask_rate=0.3,
Ejemplo n.º 23
0
import os
import keras
from keras_bert import get_model

model = get_model(
    token_num=30000,
    pos_num=512,
    transformer_num=12,
    head_num=12,
    embed_dim=768,
    feed_forward_dim=768 * 4,
)
model.summary(line_length=120)
current_path = os.path.dirname(os.path.abspath(__file__))
output_path = os.path.join(current_path, 'bert_small.png')
keras.utils.plot_model(model, show_shapes=True, to_file=output_path)

model = get_model(
    token_num=30000,
    pos_num=512,
    transformer_num=24,
    head_num=16,
    embed_dim=1024,
    feed_forward_dim=1024 * 4,
)
model.summary(line_length=120)
output_path = os.path.join(current_path, 'bert_big.png')
keras.utils.plot_model(model, show_shapes=True, to_file=output_path)
Ejemplo n.º 24
0
    def build_model_from_config(
        config_file,
        checkpoint_file,
        training=False,
        trainable=False,
        seq_len=None,
    ):
        """Build the model from config file.

        :param config_file: The path to the JSON configuration file.
        :param training: If training, the whole model will be returned.
        :param trainable: Whether the model is trainable.
        :param seq_len: If it is not None and it is shorter than the value in the config file, the weights in
                        position embeddings will be sliced to fit the new length.
        :return: model and config
        """
        with open(config_file, 'r') as reader:
            config = json.loads(reader.read())
        if seq_len is not None:
            config['max_position_embeddings'] = min(
                seq_len, config['max_position_embeddings'])
        if trainable is None:
            trainable = training
        model = get_model(
            token_num=config['vocab_size'],
            pos_num=config['max_position_embeddings'],
            seq_len=config['max_position_embeddings'],
            embed_dim=config['hidden_size'],
            transformer_num=config['num_hidden_layers'],
            head_num=config['num_attention_heads'],
            feed_forward_dim=config['intermediate_size'],
            training=False,
            trainable=True,
        )

        # SetLearningRate(model,0.00001,True)
        inputs, outputs = model
        t_in = Input(shape=(None, ))
        s_in = Input(shape=(None, ))
        k1_in = Input(shape=(1, ))
        k2_in = Input(shape=(1, ))
        o1_in = Input(shape=(None, ))
        o2_in = Input(shape=(None, ))

        t, s, k1, k2, o1, o2 = t_in, s_in, k1_in, k2_in, o1_in, o2_in

        mask = Lambda(
            lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(
                inputs[0])
        outputs = Dropout(0.5)(outputs)

        attention = TimeDistributed(Dense(1, activation='tanh'))(outputs)
        attention = MaskFlatten()(attention)
        attention = Activation('softmax')(attention)
        attention = MaskRepeatVector(config['hidden_size'])(attention)
        attention = MaskPermute([2, 1])(attention)
        sent_representation = multiply([outputs, attention])
        attention = Lambda(lambda xin: K.sum(xin, axis=1))(sent_representation)

        t_dim = K.int_shape(outputs)[-1]
        h = Lambda(seq_and_vec,
                   output_shape=(None, t_dim * 2))([outputs, attention])
        conv1 = MaskedConv1D()(h)
        ps = Dense(3, activation='softmax')(conv1)
        subject_model = keras.models.Model([inputs[0], inputs[1]],
                                           [ps])  # 预测subject的模型
        ##预测o1,o2
        k1 = Lambda(seq_gather, output_shape=(t_dim, ))([outputs, k1])
        k2 = Lambda(seq_gather, output_shape=(t_dim, ))([outputs, k2])
        k = Concatenate()([k1, k2])

        h = Lambda(seq_and_vec,
                   output_shape=(None, t_dim * 2))([outputs, attention])
        h = Lambda(seq_and_vec, output_shape=(None, t_dim * 4))([h, k])
        h = Concatenate(axis=-1)([h, conv1])
        h = MaskedConv1D()(h)
        po1 = Dense(num_classes + 1, activation='softmax')(h)
        po2 = Dense(num_classes + 1, activation='softmax')(h)

        object_model = keras.models.Model(
            [inputs[0], inputs[1], k1_in, k2_in],
            [po1, po2])  # 输入text和subject,预测object及其关系

        train_model = keras.models.Model(
            inputs=[inputs[0], inputs[1], s_in, k1_in, k2_in, o1_in, o2_in],
            outputs=[ps, po1, po2])

        s_loss = K.sparse_categorical_crossentropy(s, ps)
        s_loss = K.sum(s_loss * mask[:, :, 0]) / K.sum(mask)

        o1_loss = K.sparse_categorical_crossentropy(o1, po1)
        o1_loss = K.sum(o1_loss * mask[:, :, 0]) / K.sum(mask)
        o2_loss = K.sparse_categorical_crossentropy(o2, po2)
        o2_loss = K.sum(o2_loss * mask[:, :, 0]) / K.sum(mask)
        train_model.add_loss(s_loss + o1_loss + o2_loss)
        train_model.summary()
        train_model.compile(optimizer=keras.optimizers.Adam(lr=3e-5), )

        load_model_weights_from_checkpoint(train_model, config,
                                           checkpoint_file, training)
        return train_model, subject_model, object_model
## Build Token Dictionary
token_dict = get_base_dict()
for pairs in sentence_pairs:
    for token in pairs[0] + pairs[1]:
        if token not in token_dict:
            token_dict[token] = len(token_dict)
token_list = list(token_dict.keys())

print("token_dict size: ", len(token_dict))

## Build Keras Model
model = get_model(
    token_num=len(token_dict),
    head_num=5,
    transformer_num=12,
    embed_dim=25,
    feed_forward_dim=100,
    seq_len=20,
    pos_num=20,
    dropout_rate=0.05,
)
compile_model(model)
model.summary()


## Train Keras Model
def _generator():
    while True:
        yield gen_batch_inputs(
            sentence_pairs,
            token_dict,
            token_list,
Ejemplo n.º 26
0
                               batch_size=16)


def _get_session():
    tf_config = tf.ConfigProto(use_per_session_threads=True,
                               allow_soft_placement=True)
    tf_config.gpu_options.allow_growth = True
    return tf.Session(graph=tf.get_default_graph(), config=tf_config)


K.set_session(_get_session())

# Build & train the model
model = get_model(token_num=len(token_dict),
                  embed_dim=256,
                  head_num=4,
                  transformer_num=6,
                  seq_len=seq_len)
model.summary()

model.fit_generator(
    generator=_generator(),
    steps_per_epoch=4000,
    epochs=100,
    validation_data=_generator(),
    validation_steps=100,
    verbose=1,
    callbacks=[keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)],
)

model.save_weights('bert_nlg.hdf5')
Ejemplo n.º 27
0
    def test_fit(self):
        current_path = os.path.dirname(os.path.abspath(__file__))
        model_path = os.path.join(current_path, 'test_bert_fit.h5')
        sentence_pairs = [
            [['all', 'work', 'and', 'no', 'play'], ['makes', 'jack', 'a', 'dull', 'boy']],
            [['from', 'the', 'day', 'forth'], ['my', 'arm', 'changed']],
            [['and', 'a', 'voice', 'echoed'], ['power', 'give', 'me', 'more', 'power']],
        ]
        token_dict = get_base_dict()
        for pairs in sentence_pairs:
            for token in pairs[0] + pairs[1]:
                if token not in token_dict:
                    token_dict[token] = len(token_dict)
        token_list = list(token_dict.keys())
        if os.path.exists(model_path):
            model = keras.models.load_model(
                model_path,
                custom_objects=get_custom_objects(),
            )
        else:
            model = get_model(
                token_num=len(token_dict),
                head_num=5,
                transformer_num=12,
                embed_dim=25,
                feed_forward_dim=100,
                seq_len=20,
                pos_num=20,
                dropout_rate=0.05,
                attention_activation=gelu,
                lr=1e-3,
                decay_steps=30000,
                warmup_steps=10000,
                weight_decay=1e-3,
            )
        model.summary()

        def _generator():
            while True:
                yield gen_batch_inputs(
                    sentence_pairs,
                    token_dict,
                    token_list,
                    seq_len=20,
                    mask_rate=0.3,
                    swap_sentence_rate=1.0,
                )

        model.fit_generator(
            generator=_generator(),
            steps_per_epoch=1000,
            epochs=1,
            validation_data=_generator(),
            validation_steps=100,
        )
        # model.save(model_path)
        for inputs, outputs in _generator():
            predicts = model.predict(inputs)
            outputs = list(map(lambda x: np.squeeze(x, axis=-1), outputs))
            predicts = list(map(lambda x: np.argmax(x, axis=-1), predicts))
            batch_size, seq_len = inputs[-1].shape
            for i in range(batch_size):
                for j in range(seq_len):
                    if inputs[-1][i][j]:
                        self.assertEqual(outputs[0][i][j], predicts[0][i][j])
            self.assertTrue(np.allclose(outputs[1], predicts[1]))
            break
Ejemplo n.º 28
0
bert_config = {
    'token_num': len(token_dict),
    'head_num': 6,  # было 4
    'transformer_num': 4,
    'embed_dim': 36,
    'feed_forward_dim': 150,  # было 100
    'seq_len': max_seq_len,
    'pos_num': max_seq_len,
    'dropout_rate': 0.05,
}

with open('../tmp/bert.config', 'w') as f:
    json.dump(bert_config, f)

# Build & train the model
model = get_model(**bert_config)
model.summary()

#for layer in model.layers:
#    print('{}: {} --> {}'.format(layer.name, layer.input_shape, layer.output_shape))


def my_generator(samples, batch_size):
    while True:
        start_index = 0
        while (start_index + batch_size) < len(samples):
            if False:
                print(
                    u'DEBUG\nstart_index={}\nphrase1 len={} words={}\nphrase2 len={} words={}\n'
                    .format(start_index, len(samples[start_index][0]),
                            u' '.join(samples[start_index][0]),
Ejemplo n.º 29
0
import os
import keras
from keras_bert import get_model

model = get_model(
    token_num=30000,
    pos_num=512,
    transformer_num=12,
    head_num=12,
    embed_dim=768,
    feed_forward_dim=768 * 4,
)
model.summary(line_length=120)
current_path = os.path.dirname(os.path.abspath(__file__))
output_path = os.path.join(current_path, 'bert_small.png')
keras.utils.plot_model(model, show_shapes=True, to_file=output_path)

model = get_model(
    token_num=30000,
    pos_num=512,
    transformer_num=24,
    head_num=16,
    embed_dim=1024,
    feed_forward_dim=1024 * 4,
)
model.summary(line_length=120)
output_path = os.path.join(current_path, 'bert_big.png')
keras.utils.plot_model(model, show_shapes=True, to_file=output_path)

inputs, outputs = get_model(
    token_num=30000,
Ejemplo n.º 30
0
]

# 构建自定义词典
token_dict = get_base_dict()  # 初始化特殊符号,如`[CLS]`
for pairs in sentence_pairs:
    for token in pairs[0] + pairs[1]:
        if token not in token_dict:
            token_dict[token] = len(token_dict)
token_list = list(token_dict.keys())  # Used for selecting a random word

# 构建和训练模型
model = get_model(
    token_num=len(token_dict),
    head_num=5,
    transformer_num=12,
    embed_dim=25,
    feed_forward_dim=100,
    seq_len=20,
    pos_num=20,
    dropout_rate=0.05,
)
compile_model(model)
model.summary()

plot_model(model, to_file="model.png", show_shapes=True)


def _generator():
    while True:
        yield gen_batch_inputs(
            sentence_pairs,
            token_dict,