Ejemplo n.º 1
0
def load_from_pretrained(pretrained_path, lr, seq_len, optimizer_type,
                         decay_rate, warmup_steps, decay_steps):
    config_path = os.path.join(pretrained_path, 'bert_config.json')
    checkpoint_path = os.path.join(pretrained_path, 'bert_model.ckpt')
    model, config = build_model_from_config(
        config_path,
        training=False,
        trainable=True,
        output_layer_num=1,
        seq_len=seq_len,
    )
    load_model_weights_from_checkpoint(model,
                                       config,
                                       checkpoint_path,
                                       training=False)
    inputs = model.inputs
    outputs = model.outputs
    transformer_output = outputs[0]
    logits = keras.layers.Dense(
        units=2,
        trainable=True,
        name='logits',
        kernel_initializer=TruncatedNormal(stddev=0.02))(transformer_output)
    start_logits = Lambda(lambda x: x[:, :, 0], name='start-logits')(logits)
    end_logits = Lambda(lambda x: x[:, :, 1], name='end-logits')(logits)

    model = keras.models.Model(inputs=inputs,
                               outputs=[start_logits, end_logits])

    if optimizer_type == 'decay':
        optimizer = Adam(lr=lr, amsgrad=True, decay=decay_rate)
    else:
        optimizer = AdamWD(lr=lr,
                           amsgrad=True,
                           warmup_steps=warmup_steps,
                           decay_steps=decay_steps)

    model.compile(
        optimizer=optimizer,
        loss=custom_loss,
    )

    model.summary()

    return model
Ejemplo n.º 2
0
    def build_model_from_config(
        config_file,
        checkpoint_file,
        training=False,
        trainable=False,
        seq_len=None,
    ):
        """Build the model from config file.

        :param config_file: The path to the JSON configuration file.
        :param training: If training, the whole model will be returned.
        :param trainable: Whether the model is trainable.
        :param seq_len: If it is not None and it is shorter than the value in the config file, the weights in
                        position embeddings will be sliced to fit the new length.
        :return: model and config
        """
        with open(config_file, 'r') as reader:
            config = json.loads(reader.read())
        if seq_len is not None:
            config['max_position_embeddings'] = min(
                seq_len, config['max_position_embeddings'])
        if trainable is None:
            trainable = training
        model = get_model(
            token_num=config['vocab_size'],
            pos_num=config['max_position_embeddings'],
            seq_len=config['max_position_embeddings'],
            embed_dim=config['hidden_size'],
            transformer_num=config['num_hidden_layers'],
            head_num=config['num_attention_heads'],
            feed_forward_dim=config['intermediate_size'],
            training=False,
            trainable=True,
        )

        # SetLearningRate(model,0.00001,True)
        inputs, outputs = model
        t_in = Input(shape=(None, ))
        s_in = Input(shape=(None, ))
        k1_in = Input(shape=(1, ))
        k2_in = Input(shape=(1, ))
        o1_in = Input(shape=(None, ))
        o2_in = Input(shape=(None, ))

        t, s, k1, k2, o1, o2 = t_in, s_in, k1_in, k2_in, o1_in, o2_in

        mask = Lambda(
            lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(
                inputs[0])
        outputs = Dropout(0.5)(outputs)

        attention = TimeDistributed(Dense(1, activation='tanh'))(outputs)
        attention = MaskFlatten()(attention)
        attention = Activation('softmax')(attention)
        attention = MaskRepeatVector(config['hidden_size'])(attention)
        attention = MaskPermute([2, 1])(attention)
        sent_representation = multiply([outputs, attention])
        attention = Lambda(lambda xin: K.sum(xin, axis=1))(sent_representation)

        t_dim = K.int_shape(outputs)[-1]
        h = Lambda(seq_and_vec,
                   output_shape=(None, t_dim * 2))([outputs, attention])
        conv1 = MaskedConv1D()(h)
        ps = Dense(3, activation='softmax')(conv1)
        subject_model = keras.models.Model([inputs[0], inputs[1]],
                                           [ps])  # 预测subject的模型
        ##预测o1,o2
        k1 = Lambda(seq_gather, output_shape=(t_dim, ))([outputs, k1])
        k2 = Lambda(seq_gather, output_shape=(t_dim, ))([outputs, k2])
        k = Concatenate()([k1, k2])

        h = Lambda(seq_and_vec,
                   output_shape=(None, t_dim * 2))([outputs, attention])
        h = Lambda(seq_and_vec, output_shape=(None, t_dim * 4))([h, k])
        h = Concatenate(axis=-1)([h, conv1])
        h = MaskedConv1D()(h)
        po1 = Dense(num_classes + 1, activation='softmax')(h)
        po2 = Dense(num_classes + 1, activation='softmax')(h)

        object_model = keras.models.Model(
            [inputs[0], inputs[1], k1_in, k2_in],
            [po1, po2])  # 输入text和subject,预测object及其关系

        train_model = keras.models.Model(
            inputs=[inputs[0], inputs[1], s_in, k1_in, k2_in, o1_in, o2_in],
            outputs=[ps, po1, po2])

        s_loss = K.sparse_categorical_crossentropy(s, ps)
        s_loss = K.sum(s_loss * mask[:, :, 0]) / K.sum(mask)

        o1_loss = K.sparse_categorical_crossentropy(o1, po1)
        o1_loss = K.sum(o1_loss * mask[:, :, 0]) / K.sum(mask)
        o2_loss = K.sparse_categorical_crossentropy(o2, po2)
        o2_loss = K.sum(o2_loss * mask[:, :, 0]) / K.sum(mask)
        train_model.add_loss(s_loss + o1_loss + o2_loss)
        train_model.summary()
        train_model.compile(optimizer=keras.optimizers.Adam(lr=3e-5), )

        load_model_weights_from_checkpoint(train_model, config,
                                           checkpoint_file, training)
        return train_model, subject_model, object_model
Ejemplo n.º 3
0
def build_model_from_config(
    config_file,
    checkpoint_file,
    training=False,
    trainable=False,
    seq_len=None,
):
    """Build the model from config file.

    :param config_file: The path to the JSON configuration file.
    :param training: If training, the whole model will be returned.
    :param trainable: Whether the model is trainable.
    :param seq_len: If it is not None and it is shorter than the value in the config file, the weights in
                    position embeddings will be sliced to fit the new length.
    :return: model and config
    """
    with open(config_file, 'r') as reader:
        config = json.loads(reader.read())
    if seq_len is not None:
        config['max_position_embeddings'] = min(
            seq_len, config['max_position_embeddings'])
    if trainable is None:
        trainable = training
    model = get_model(
        token_num=config['vocab_size'],
        pos_num=config['max_position_embeddings'],
        seq_len=config['max_position_embeddings'],
        embed_dim=config['hidden_size'],
        transformer_num=config['num_hidden_layers'],
        head_num=config['num_attention_heads'],
        feed_forward_dim=config['intermediate_size'],
        training=False,
        trainable=True,
    )
    inputs, outputs = model
    bio_label = Input(shape=(maxlen, ))
    event = Input(shape=(1, ))

    mask = Lambda(
        lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(
            inputs[0])
    event_embedding = Embedding(len(event2id), hidden_size,
                                mask_zero=True)(event)

    outputs = Dropout(0.15)(outputs)
    attention = TimeDistributed(Dense(1, activation='tanh'))(outputs)
    attention = MaskFlatten()(attention)
    attention = Activation('softmax')(attention)
    attention = MaskRepeatVector(config['hidden_size'])(attention)
    attention = MaskPermute([2, 1])(attention)
    sent_representation = multiply([outputs, attention])
    attention = Lambda(lambda xin: K.sum(xin, axis=1))(sent_representation)
    t_dim = K.int_shape(outputs)[-1]
    bert_attention = Lambda(seq_and_vec,
                            output_shape=(None,
                                          t_dim * 2))([outputs, attention])

    cnn1 = MaskedConv1D(filters=hidden_size,
                        kernel_size=3,
                        activation='relu',
                        padding='same')(bert_attention)
    event_bc = Lambda(lambda input: input[0] * 0 + input[1])(
        [cnn1, event_embedding])
    con_cnn_event = Concatenate(axis=-1)([cnn1, event_bc])
    dens1 = Dense(hidden_size, activation='relu', use_bias=True)(con_cnn_event)
    #BIOE
    bio_pred = Dense(4, activation='softmax')(dens1)
    entity_model = keras.models.Model([inputs[0], inputs[1], event],
                                      [bio_pred])  # 预测subject的模型
    train_model = keras.models.Model([inputs[0], inputs[1], bio_label, event],
                                     [bio_pred])

    loss = K.sparse_categorical_crossentropy(bio_label, bio_pred)
    loss = K.sum(loss * mask[:, :, 0]) / K.sum(mask)

    train_model.add_loss(loss)
    train_model.summary()
    train_model.compile(optimizer=keras.optimizers.Adam(lr=3e-5), )
    load_model_weights_from_checkpoint(train_model, config, checkpoint_file,
                                       training)
    return train_model, entity_model
bert = get_model(
    token_num=config['vocab_size'],
    pos_num=config['max_position_embeddings'],
    seq_len=seq_len,
    embed_dim=config['hidden_size'],
    transformer_num=config['num_hidden_layers'],
    head_num=config['num_attention_heads'],
    feed_forward_dim=config['intermediate_size'],
    feed_forward_activation=config['hidden_act'],
    training=None,
    trainable=True,
    output_layer_num=1,
)

inputs, outputs = bert
print(type(bert), type(outputs))
load_model_weights_from_checkpoint(outputs, config,
                                   model_path + "bert_model.ckpt")

x1 = Input(shape=(None, ))
x2 = Input(shape=(None, ))
bert_out = outputs.output([x1, x2])
lstm_out = Bidirectional(
    LSTM(64, return_sequences=True, dropout=0.2,
         recurrent_dropout=0.2))(bert_out)
crf_out = CRF(8, sparse_target=True)(lstm_out)
model = Model(inputs=[x1, x2], outputs=crf_out)

model.summary()
Ejemplo n.º 5
0
def build_csc_model(max_seq_len):
    # build detect model
    with open(paths.config, 'r') as reader:
        config = json.load(reader)
    if max_seq_len is not None:
        config['max_position_embeddings'] = min(
            max_seq_len, config['max_position_embeddings'])
    seq_len = config["max_position_embeddings"]
    inputs = get_inputs(seq_len)  # [input_ids, segment_ids, input_mask]
    token_num = len(token_dict)
    embed_dim = config["hidden_size"]
    # config["num_hidden_layers"] = 1

    token_embedding_lookup = TokenEmbedding(
        input_dim=token_num,
        output_dim=embed_dim,
        mask_zero=True,
        trainable=True,
        name='Embedding-Token',
    )
    segment_embedding_lookup = keras.layers.Embedding(
        input_dim=2,
        output_dim=embed_dim,
        trainable=True,
        name='Embedding-Segment',
    )
    position_embed_layer = PositionEmbedding(
        input_dim=seq_len,
        output_dim=embed_dim,
        mode=PositionEmbedding.MODE_ADD,
        trainable=True,
        name='Embedding-Position',
    )
    token_emb, embed_weights = token_embedding_lookup(inputs[0])
    seg_emb = segment_embedding_lookup(inputs[1])
    add = keras.layers.Add(name='Embedding-Token-Segment')
    embeddings = position_embed_layer(add([token_emb, seg_emb]))
    # embeddings = keras.layers.Embedding(input_dim=token_num, output_dim=embed_dim, mask_zero=True)(inputs[0])

    mask = K.cast(inputs[2], dtype='bool')
    x = keras.layers.Bidirectional(keras.layers.GRU(
        256, return_sequences=True))(embeddings, mask=mask)
    err_prob = keras.layers.Dense(1, activation='sigmoid', name="error_prob")(
        x)  # shape: (None, seq_len, 1)
    # detect_model = keras.Model(inputs, err_prob)
    # detect_model.summary()

    # build correct model
    num_classes = char_end_index - char_start_index + 2  # add extra id representing the oov original char

    mask_ids = K.constant(mask_id, shape=(1, max_seq_len))
    mask_emb, _ = token_embedding_lookup(mask_ids)
    soft_emb = err_prob * mask_emb + (
        1. - err_prob) * token_emb  # broadcast, shape(None, seq_len, emb_size)
    new_embeddings = position_embed_layer(add([soft_emb, seg_emb]))

    bert_output, bert = get_model_from_embedding(
        inputs,
        new_embeddings,
        transformer_num=config['num_hidden_layers'],
        head_num=config['num_attention_heads'],
        feed_forward_dim=config['intermediate_size'],
        feed_forward_activation=config['hidden_act'])
    load_model_weights_from_checkpoint(bert, config, paths.checkpoint)

    output = keras.layers.Dense(num_classes,
                                activation='softmax',
                                name="correct_prob")(bert_output + embeddings)
    error_prob = err_prob[:, :, 0]  # squeeze
    correct_model = keras.Model(inputs, [output, error_prob])
    # correct_model.summary()

    mistake_labels = keras.layers.Input(shape=(seq_len, ),
                                        dtype='float32',
                                        name="mistake_labels")
    char_labels = keras.layers.Input(shape=(seq_len, ),
                                     dtype='int32',
                                     name="char_labels")
    # 训练模型
    train_model = keras.Model(inputs=inputs + [mistake_labels, char_labels],
                              outputs=[output, error_prob])

    # 去掉头部的[CLS]和尾部的[SEP]
    mask_sum = K.sum(inputs[2], axis=-1)
    diff = K.one_hot(mask_sum - 1, seq_len) + K.one_hot(0, seq_len)
    mask_float = K.cast_to_floatx(inputs[2]) - diff
    args_for_loss = (mask_float, char_labels, mistake_labels, error_prob,
                     output)
    loss = keras.layers.Lambda(custom_loss)(args_for_loss)
    train_model.add_loss(loss)
    train_model.summary()
    train_model.compile(optimizer=keras.optimizers.Adam(learning_rate))
    return train_model, correct_model
Ejemplo n.º 6
0
 def on_train_begin(self, logs=None):
     load_model_weights_from_checkpoint(self.bert, config,
                                        self.checkpoint_path)