Ejemplo n.º 1
0
    def test_google_weights(self):
        albert_model_name = "albert_base"
        albert_dir = bert.fetch_tfhub_albert_model(albert_model_name,
                                                   ".models")

        albert_params = bert.albert_params(albert_model_name)
        l_bert = bert.BertModelLayer.from_params(albert_params, name="albert")

        l_input_ids = keras.layers.Input(shape=(128, ),
                                         dtype='int32',
                                         name="input_ids")
        l_token_type_ids = keras.layers.Input(shape=(128, ),
                                              dtype='int32',
                                              name="token_type_ids")
        output = l_bert([l_input_ids, l_token_type_ids])
        output = keras.layers.Lambda(lambda x: x[:, 0, :])(output)
        output = keras.layers.Dense(2)(output)
        model = keras.Model(inputs=[l_input_ids, l_token_type_ids],
                            outputs=output)

        model.build(input_shape=(None, 128))
        model.compile(
            optimizer=keras.optimizers.Adam(),
            loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")])

        for weight in l_bert.weights:
            print(weight.name)

        bert.load_albert_weights(l_bert, albert_dir)

        model.summary()
Ejemplo n.º 2
0
    def test_albert_load_base_google_weights(self):  # for coverage mainly
        albert_model_name = "albert_base"
        albert_dir = bert.fetch_tfhub_albert_model(albert_model_name,
                                                   ".models")
        model_params = bert.albert_params(albert_model_name)

        l_bert = bert.BertModelLayer.from_params(model_params, name="albert")

        model = keras.models.Sequential([
            keras.layers.InputLayer(input_shape=(8, ),
                                    dtype=tf.int32,
                                    name="input_ids"),
            l_bert,
            keras.layers.Lambda(lambda x: x[:, 0, :]),
            keras.layers.Dense(2),
        ])
        model.build(input_shape=(None, 8))
        model.compile(
            optimizer=keras.optimizers.Adam(),
            loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")])

        bert.load_albert_weights(l_bert, albert_dir)

        model.summary()
Ejemplo n.º 3
0
def create_model(l_bert, model_ckpt, max_seq_len, num_labels,
                 label_threshold_less, model_type):
    """
    Wrapper function to return keras learning rate scheduler callback

    Args:
        l_bert (bert.model.BertModelLayer): BERT layer
        model_ckpt (str): path to best model checkpoint
        max_seq_length (int): maximum sequence length for training data
        num_labels (int): final output dimensionality per token
        label_threshold_less (int): all label IDs strictly less than this number
        will be ignored in class accuracy calculations
        model_type (str): type of model decoder to use, see
        './utils/model_utils.py'

    Returns:
        model (tensorflow.python.keras.engine.training.Model): final compiled
        model which can be used for fine-tuning
    """
    input_ids = Input(shape=(max_seq_len, ), dtype='int32')
    output = l_bert(input_ids)
    if model_type == "TD_Dense":
        output = TimeDistributed(Dense(512))(output)
        output = Activation("relu")(output)
        output = TimeDistributed(Dense(256))(output)
        output = Activation("relu")(output)
        output = TimeDistributed(Dense(128))(output)
        output = Activation("relu")(output)
        output = TimeDistributed(Dense(64))(output)
        output = Activation("relu")(output)
        output = TimeDistributed(Dense(num_labels))(output)
    elif model_type == "1D_CNN":
        output = Conv1D(512, 3, padding="same")(output)
        output = Activation("relu")(output)
        output = Conv1D(256, 3, padding="same")(output)
        output = Activation("relu")(output)
        output = Conv1D(128, 3, padding="same")(output)
        output = Activation("relu")(output)
        output = Conv1D(64, 3, padding="same")(output)
        output = Activation("relu")(output)
        output = Conv1D(num_labels, 3, padding="same")(output)
    elif model_type == "Stacked_LSTM":
        output = LSTM(512, return_sequences=True)(output)
        output = LSTM(256, return_sequences=True)(output)
        output = LSTM(128, return_sequences=True)(output)
        output = TimeDistributed(Dense(64))(output)
        output = Activation("relu")(output)
        output = TimeDistributed(Dense(num_labels))(output)
    prob = Activation("softmax")(output)
    model = tf.keras.Model(inputs=input_ids, outputs=prob)
    model.build(input_shape=(None, max_seq_len))
    bert.load_albert_weights(l_bert, model_ckpt)
    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  metrics=[class_acc(label_threshold_less)])
    model.summary()
    return model
Ejemplo n.º 4
0
def create_model(
    model_dir, model_type, max_seq_len, n_classes, load_pretrained_weights=True, summary=False,
):
    """Creates keras model with pretrained BERT/ALBERT layer.

    Args:
        model_dir: String. Path to model.
        model_type: String. Expects either "albert" or "bert"
        max_seq_len: Int. Maximum length of a classificaton example.
        n_classes: Int. Number of training classes.
        load_pretrained_weights: Boolean. Load pretrained model weights.
        summary: Boolean. Print model summary.

    Returns:
        Keras model
    """
    if model_type == "albert":
        model_ckpt = os.path.join(model_dir, "model.ckpt-best")
        model_params = bert.albert_params(model_dir)
    elif model_type == "bert":
        model_ckpt = os.path.join(model_dir, "bert_model.ckpt")
        model_params = bert.params_from_pretrained_ckpt(model_dir)

    layer_bert = bert.BertModelLayer.from_params(model_params, name=model_type)

    input_ids = keras.layers.Input(shape=(max_seq_len,), dtype="int32", name="input_ids")
    output = layer_bert(input_ids)

    cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output)
    cls_out = keras.layers.Dropout(0.5)(cls_out)
    logits = keras.layers.Dense(units=model_params["hidden_size"], activation="relu")(cls_out)
    logits = keras.layers.Dropout(0.5)(logits)
    logits = keras.layers.Dense(units=n_classes, activation="softmax")(logits)

    model = keras.Model(inputs=input_ids, outputs=logits)
    model.build(input_shape=(None, max_seq_len))

    if load_pretrained_weights:
        if model_type == "albert":
            bert.load_albert_weights(layer_bert, model_ckpt)
        elif model_type == "bert":
            bert.load_bert_weights(layer_bert, model_ckpt)

    model.compile(
        optimizer=keras.optimizers.Adam(),
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")],
    )
    if summary:
        model.summary()
    return model
Ejemplo n.º 5
0
    def test_albert_params(self):
        albert_model_name = "albert_base"
        albert_dir = bert.fetch_tfhub_albert_model(albert_model_name,
                                                   ".models")
        dir_params = bert.albert_params(albert_dir)
        dir_params.attention_dropout = 0.1  # diff between README and assets/albert_config.json
        dir_params.hidden_dropout = 0.1
        name_params = bert.albert_params(albert_model_name)
        self.assertEqual(name_params, dir_params)

        # coverage
        model_params = dir_params
        model_params.vocab_size = model_params.vocab_size + 2
        model_params.adapter_size = 1
        l_bert = bert.BertModelLayer.from_params(model_params, name="albert")
        l_bert(tf.zeros((1, 128)))
        bert.load_albert_weights(l_bert, albert_dir)
Ejemplo n.º 6
0
    def test_albert_google_weights(self):
        albert_model_name = "albert_base"
        albert_dir = bert.fetch_tfhub_albert_model(albert_model_name, ".models")

        albert_params = bert.albert_params(albert_model_name)
        model, l_bert = self.build_model(albert_params)

        skipped_weight_value_tuples = bert.load_albert_weights(l_bert, albert_dir)
        self.assertEqual(0, len(skipped_weight_value_tuples))
        model.summary()
Ejemplo n.º 7
0
def Albert_model(max_seq_len):
    model_name = "albert_large"
    model_dir = bert.fetch_tfhub_albert_model(model_name, ".models")
    model_params = bert.albert_params(model_name)
    model_params.shared_layer = True
    model_params.embedding_size = 1024

    l_bert = bert.BertModelLayer.from_params(model_params, name="albert")

    l_input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32')

    # using the default token_type/segment id 0
    output = l_bert(l_input_ids)                              # output: [batch_size, max_seq_len, hidden_size]
    output = keras.layers.GlobalAveragePooling1D()(output)
    model = keras.Model(inputs=l_input_ids, outputs=output)
    model.build(input_shape=(None, max_seq_len))
    # use in a Keras Model here, and call model.build()
    bert.load_albert_weights(l_bert, model_dir)       # should be called after model.build()
    return model, model_dir
Ejemplo n.º 8
0
    def test_albert_google_weights_non_tfhub(self):
        albert_model_name = "albert_base_v2"
        albert_dir = bert.fetch_google_albert_model(albert_model_name, ".models")
        model_ckpt = os.path.join(albert_dir, "model.ckpt-best")

        albert_params = bert.albert_params(albert_dir)
        model, l_bert = self.build_model(albert_params)

        skipped_weight_value_tuples = bert.load_albert_weights(l_bert, model_ckpt)
        self.assertEqual(0, len(skipped_weight_value_tuples))
        model.summary()
Ejemplo n.º 9
0
    def test_albert_chinese_weights(self):
        albert_model_name = "albert_base"
        albert_dir = bert.fetch_brightmart_albert_model(albert_model_name, ".models")
        albert_ckpt = os.path.join(albert_dir, "albert_model.ckpt")

        albert_params = bert.params_from_pretrained_ckpt(albert_dir)
        model, l_bert = self.build_model(albert_params)

        skipped_weight_value_tuples = bert.load_albert_weights(l_bert, albert_ckpt)
        self.assertEqual(0, len(skipped_weight_value_tuples))
        model.summary()
Ejemplo n.º 10
0
    def test_albert_zh_fetch_and_load(self):
        albert_model_name = "albert_tiny"
        albert_dir = bert.fetch_brightmart_albert_model(
            albert_model_name, ".models")

        model_params = bert.params_from_pretrained_ckpt(albert_dir)
        model_params.vocab_size = model_params.vocab_size + 2
        model_params.adapter_size = 1
        l_bert = bert.BertModelLayer.from_params(model_params, name="albert")
        l_bert(tf.zeros((1, 128)))
        res = bert.load_albert_weights(l_bert, albert_dir)
        self.assertTrue(len(res) > 0)
Ejemplo n.º 11
0
    def test_chinese_weights(self):
        #bert_ckpt_dir = ".models/albert_base_zh/"
        #bert_ckpt_file = bert_ckpt_dir + "albert_model.ckpt"
        #bert_config_file = bert_ckpt_dir + "albert_config_base.json"

        print("Eager Execution:", tf.executing_eagerly())

        albert_model_name = "albert_base"
        albert_dir = bert.fetch_brightmart_albert_model(
            albert_model_name, ".models")
        albert_ckpt = os.path.join(albert_dir, "albert_model.ckpt")

        bert_params = bert.params_from_pretrained_ckpt(albert_dir)
        l_bert = bert.BertModelLayer.from_params(bert_params, name="bert")

        l_input_ids = keras.layers.Input(shape=(128, ),
                                         dtype='int32',
                                         name="input_ids")
        l_token_type_ids = keras.layers.Input(shape=(128, ),
                                              dtype='int32',
                                              name="token_type_ids")
        output = l_bert([l_input_ids, l_token_type_ids])
        output = keras.layers.Lambda(lambda x: x[:, 0, :])(output)
        output = keras.layers.Dense(2)(output)
        model = keras.Model(inputs=[l_input_ids, l_token_type_ids],
                            outputs=output)

        model.build(input_shape=(None, 128))
        model.compile(
            optimizer=keras.optimizers.Adam(),
            loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")])

        for weight in l_bert.weights:
            print(weight.name)

        bert.load_albert_weights(l_bert, albert_ckpt)

        model.summary()
Ejemplo n.º 12
0
def load_bert_model(name_model, max_seq_len, trainable=False):
    """
    models name supported, same as tf-2.0-bert
    """
    model_name = name_model
    model_dir = bert.fetch_tfhub_albert_model(model_name, ".models")
    model_params = bert.albert_params(model_name)

    l_bert = bert.BertModelLayer.from_params(model_params, name=name_model)

    l_input_ids = tf.keras.layers.Input(shape=(max_seq_len, ), dtype='int32')

    output = l_bert(
        l_input_ids)  # output: [batch_size, max_seq_len, hidden_size]

    model = tf.keras.Model(inputs=l_input_ids, outputs=output)
    model.build(input_shape=(None, max_seq_len))

    # load google albert original weights after the build
    bert.load_albert_weights(l_bert, model_dir)
    model.trainable = trainable

    return model
Ejemplo n.º 13
0
def build_transformer(transformer, max_seq_length=None, num_labels=None, tagging=True, tokenizer_only=False):
    spm_model_file = None
    if transformer in zh_albert_models_google:
        from bert.tokenization.albert_tokenization import FullTokenizer
        model_url = zh_albert_models_google[transformer]
        albert = True
    elif transformer in albert_models_tfhub:
        from edparser.layers.transformers.albert_tokenization import FullTokenizer
        with stdout_redirected(to=os.devnull):
            model_url = fetch_tfhub_albert_model(transformer,
                                                 os.path.join(hanlp_home(), 'thirdparty', 'tfhub.dev', 'google',
                                                              transformer))
        albert = True
        spm_model_file = glob.glob(os.path.join(model_url, 'assets', '*.model'))
        assert len(spm_model_file) == 1, 'No vocab found or unambiguous vocabs found'
        spm_model_file = spm_model_file[0]
    elif transformer in bert_models_google:
        from bert.tokenization.bert_tokenization import FullTokenizer
        model_url = bert_models_google[transformer]
        albert = False
    else:
        raise ValueError(
            f'Unknown model {transformer}, available ones: {list(bert_models_google.keys()) + list(zh_albert_models_google.keys()) + list(albert_models_tfhub.keys())}')
    bert_dir = get_resource(model_url)
    if spm_model_file:
        vocab = glob.glob(os.path.join(bert_dir, 'assets', '*.vocab'))
    else:
        vocab = glob.glob(os.path.join(bert_dir, '*vocab*.txt'))
    assert len(vocab) == 1, 'No vocab found or unambiguous vocabs found'
    vocab = vocab[0]
    lower_case = any(key in transformer for key in ['uncased', 'multilingual', 'chinese', 'albert'])
    if spm_model_file:
        # noinspection PyTypeChecker
        tokenizer = FullTokenizer(vocab_file=vocab, spm_model_file=spm_model_file, do_lower_case=lower_case)
    else:
        tokenizer = FullTokenizer(vocab_file=vocab, do_lower_case=lower_case)
    if tokenizer_only:
        return tokenizer
    if spm_model_file:
        bert_params = albert_params(bert_dir)
    else:
        bert_params = bert.params_from_pretrained_ckpt(bert_dir)
    l_bert = bert.BertModelLayer.from_params(bert_params, name='albert' if albert else "bert")
    if not max_seq_length:
        return l_bert, tokenizer, bert_dir
    l_input_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype='int32', name="input_ids")
    l_mask_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype='int32', name="mask_ids")
    l_token_type_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype='int32', name="token_type_ids")
    output = l_bert([l_input_ids, l_token_type_ids], mask=l_mask_ids)
    if not tagging:
        output = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :])(output)
    if bert_params.hidden_dropout:
        output = tf.keras.layers.Dropout(bert_params.hidden_dropout, name='hidden_dropout')(output)
    logits = tf.keras.layers.Dense(num_labels, kernel_initializer=tf.keras.initializers.TruncatedNormal(
        bert_params.initializer_range))(output)
    model = tf.keras.Model(inputs=[l_input_ids, l_mask_ids, l_token_type_ids], outputs=logits)
    model.build(input_shape=(None, max_seq_length))
    if not spm_model_file:
        ckpt = glob.glob(os.path.join(bert_dir, '*.index'))
        assert ckpt, f'No checkpoint found under {bert_dir}'
        ckpt, _ = os.path.splitext(ckpt[0])
    with stdout_redirected(to=os.devnull):
        if albert:
            if spm_model_file:
                skipped_weight_value_tuples = bert.load_albert_weights(l_bert, bert_dir)
            else:
                # noinspection PyUnboundLocalVariable
                skipped_weight_value_tuples = load_stock_weights(l_bert, ckpt)
        else:
            # noinspection PyUnboundLocalVariable
            skipped_weight_value_tuples = bert.load_bert_weights(l_bert, ckpt)
    assert 0 == len(skipped_weight_value_tuples), f'failed to load pretrained {transformer}'
    return model, tokenizer
Ejemplo n.º 14
0
 def load_pretrained(self, pretrained_ckpt):
     self.build(input_shape=(None, 2, self._max_seq_len))
     bert.load_albert_weights(self.l_bert, pretrained_ckpt)
Ejemplo n.º 15
0
 def load_pretrained(self, pretrained_ckpt):
     # self.call(np.zeros((16, 2, self.max_seq_len)))
     self.build(input_shape=(None, 2, self._max_seq_len))
     bert.load_albert_weights(self.l_bert, pretrained_ckpt)
Ejemplo n.º 16
0
# TODO: Try with more regularisation
# cls_out = keras.layers.Dropout(0.5)(cls_out)
logits = keras.layers.Dense(units=256, activation='relu')(cls_out)
logits = keras.layers.Dropout(0.5)(logits)
# NOTE: Alternative to the Lambda layer
# bgru_layer = keras.layers.Bidirectional(keras.layers.GRU(64))(output)
output = keras.layers.Dense(units=1, activation='sigmoid')(logits)
model = keras.Model(inputs=input_ids, outputs=output)

# Freeze all non-trainable layers
freeze_layers(bert_layer, exclude=['LayerNorm'])
# Originally from tutorial: ['LayerNorm', 'adapter-down', 'adapter-up']

# Build model and load pre-trained weights
model.build(input_shape=(None, MAX_SEQ_LEN))
bert.load_albert_weights(bert_layer, MODEL_DIR)

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy', f1_score],
)

model.summary()


# Alternative for loading weights from checkpoint file

# from bert.loader import (StockBertConfig, map_stock_config_to_params,
#                          load_stock_weights)
# bert_ckpt_dir="gs://bert_models/2018_10_18/uncased_L-12_H-768_A-12/"