def test_bert_freeze(self):
        model_dir = tempfile.TemporaryDirectory().name
        os.makedirs(model_dir)
        save_path = MiniBertFactory.create_mini_bert_weights(model_dir)
        tokenizer = bert.bert_tokenization.FullTokenizer(vocab_file=os.path.join(model_dir, "vocab.txt"), do_lower_case=True)

        # prepare input
        max_seq_len  = 24
        input_str_batch    = ["hello, bert!", "how are you doing!"]

        input_ids, token_type_ids = self.prepare_input_batch(input_str_batch, tokenizer, max_seq_len)

        bert_ckpt_file   = os.path.join(model_dir, "bert_model.ckpt")

        bert_params = bert.params_from_pretrained_ckpt(model_dir)
        bert_params.adapter_size = 4
        l_bert = bert.BertModelLayer.from_params(bert_params)

        model = keras.models.Sequential([
            l_bert,
        ])

        model.build(input_shape=(None, max_seq_len))

        model.summary()
        l_bert.apply_adapter_freeze()
        model.summary()

        bert.load_stock_weights(l_bert, bert_ckpt_file)
        #l_bert.embeddings_layer.trainable = False

        model.summary()

        orig_weight_values = []
        for weight in l_bert.weights:
            orig_weight_values.append(weight.numpy())

        model.compile(optimizer=keras.optimizers.Adam(),
                      loss=keras.losses.mean_squared_error,
                      run_eagerly=True)

        trainable_count = len(l_bert.trainable_weights)

        orig_pred = model.predict(input_ids)
        model.fit(x=input_ids, y=np.zeros_like(orig_pred),
          batch_size=2,
          epochs=4)

        trained_count = 0
        for ndx, weight in enumerate(l_bert.weights):
            weight_equal = np.array_equal(weight.numpy(), orig_weight_values[ndx])
            print("{}: {}".format(weight_equal, weight.name))
            if not weight_equal:
                trained_count += 1

        print("  trained weights:", trained_count)
        print("trainable weights:", trainable_count)
        self.assertEqual(trained_count, trainable_count)

        model.summary()
    def test_load_pretrained(self):
        print("Eager Execution:", tf.executing_eagerly())

        bert_params = bert.loader.params_from_pretrained_ckpt(
            self.bert_ckpt_dir)
        bert_params.adapter_size = 32
        l_bert = bert.BertModelLayer.from_params(bert_params, name="bert")

        model = keras.models.Sequential([
            keras.layers.InputLayer(input_shape=(128, )), l_bert,
            keras.layers.Lambda(lambda x: x[:, 0, :]),
            keras.layers.Dense(2)
        ])

        # we need to freeze before build/compile - otherwise keras counts the params twice
        if bert_params.adapter_size is not None:
            freeze_bert_layers(l_bert)

        model.build(input_shape=(None, 128))
        model.compile(
            optimizer=keras.optimizers.Adam(),
            loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")])

        bert.load_stock_weights(l_bert, self.bert_ckpt_file)

        model.summary()
 def predict_on_keras_model(self, input_ids, input_mask, token_type_ids):
     max_seq_len = input_ids.shape[-1]
     model, l_bert, k_inputs = self.create_bert_model(max_seq_len)
     model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)])
     bert.load_stock_weights(l_bert, self.bert_ckpt_file)
     k_res = model.predict([input_ids, token_type_ids])
     return k_res
Example #4
0
def create_model(max_seq_len,
                 model_dir,
                 model_ckpt,
                 freeze=True,
                 adapter_size=4):
    bert_params = bert.params_from_pretrained_ckpt(model_dir)
    print(f'bert params: {bert_params}')
    bert_params.adapter_size = adapter_size
    bert_params.adapter_init_scale = 1e-5
    l_bert = bert.BertModelLayer.from_params(bert_params, name="bert")

    input_ids = keras.layers.Input(shape=(max_seq_len, ),
                                   dtype='int32',
                                   name="input_ids")
    bert_output = l_bert(input_ids)

    print("bert shape", bert_output.shape)

    cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :],
                                  name='lambda')(bert_output)
    cls_out = keras.layers.Dropout(0.5)(cls_out)
    logits = keras.layers.Dense(name='dense_sin',
                                units=768,
                                activation=tf.math.sin)(cls_out)
    # logits = keras.layers.Dense(name='dense_tanh', units=768, activation="tanh")(cls_out)
    # logits = keras.layers.Dense(name='dense_relu', units=256, activation="relu")(cls_out)
    # logits = keras.layers.Dense(name='dense_gelu', units=256, activation="gelu")(cls_out)
    logits = keras.layers.BatchNormalization()(logits)
    logits = keras.layers.Dropout(0.5)(logits)
    logits = keras.layers.Dense(name='initial_predictions',
                                units=len(classes),
                                activation="softmax")(logits)

    model = keras.Model(inputs=input_ids, outputs=logits)
    model.build(input_shape=(None, max_seq_len))

    model.summary()
    if freeze:
        l_bert.apply_adapter_freeze()
        l_bert.embeddings_layer.trainable = False
        model.summary()

    # Дополнительная инфа https://arxiv.org/abs/1902.00751
    # apply global regularization on all trainable dense layers
    pf.utils.add_dense_layer_loss(
        model,
        kernel_regularizer=keras.regularizers.l2(0.01),
        bias_regularizer=keras.regularizers.l2(0.01))

    model.compile(
        optimizer=pf.optimizers.RAdam(),
        # loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), # c логитами почему-то не работает совсем
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
        metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")])

    bert.load_stock_weights(l_bert, model_ckpt)
    # bert.load_bert_weights(l_bert, model_ckpt)

    return model
Example #5
0
    def create_bert_model(self):
        bert_params = bert.params_from_pretrained_ckpt(self.model_dir)
        l_bert = bert.BertModelLayer.from_params(bert_params, name="bert")

        l_input_ids = tf.keras.layers.Input(shape=(self.max_seq_length, ),
                                            dtype='int32')
        output = l_bert(l_input_ids)
        model = tf.keras.Model(inputs=l_input_ids, outputs=output)
        model.build(input_shape=(None, self.max_seq_length))

        bert.load_stock_weights(l_bert, self.model_ckpt)

        return model
Example #6
0
def Bert_feature_extraction(ids,texts, max_seq_len, feature_file_name):
    #https://github.com/kpe/bert-for-tf2
    model_dir = ".models/uncased_L-12_H-768_A-12/uncased_L-12_H-768_A-12"
    bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt")

    bert_params = bert.params_from_pretrained_ckpt(model_dir)
    l_bert = bert.BertModelLayer.from_params(bert_params, name="bert")

    l_input_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32')
    l_token_type_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32')

    # using the default token_type/segment id 0
    output = l_bert(l_input_ids)  # output: [batch_size, max_seq_len, hidden_size]

    output = keras.layers.GlobalAveragePooling1D()(output)
    model = keras.Model(inputs=l_input_ids, outputs=output)
    model.build(input_shape=(None, max_seq_len))

    bert.load_stock_weights(l_bert, bert_ckpt_file)

    do_lower_case = not (model_dir.find("cased") == 0 or model_dir.find("multi_cased") == 0)
    bert.bert_tokenization.validate_case_matches_checkpoint(do_lower_case, bert_ckpt_file)
    vocab_file = os.path.join(model_dir, "vocab.txt")
    tokenizer = bert.bert_tokenization.FullTokenizer(vocab_file, do_lower_case)

    feature_dict = {}
    for i in range(len(ids)):
        id = ids[i]
        print(id)
        title = texts[i]
        tokens = tokenizer.tokenize(title)
        print(tokens)
        tokens = ["[CLS]"] + tokens + ["[SEP]"]
        token_ids = tokenizer.convert_tokens_to_ids(tokens)

        while len(token_ids) < max_seq_len:
            token_ids.append(0)
        if len(token_ids) > max_seq_len:
            token_ids = token_ids[:max_seq_len]
        print(token_ids)

        token_ids = np.array([token_ids], dtype=np.int32)

        feature = model.predict(token_ids)

        feature_dict[id] = feature.tolist()[0]

    np.save(feature_file_name,feature_dict)
Example #7
0
    def test_extend_pretrained_tokens(self):
        model_dir = tempfile.TemporaryDirectory().name
        os.makedirs(model_dir)
        save_path = MiniBertFactory.create_mini_bert_weights(model_dir)
        tokenizer = bert.FullTokenizer(vocab_file=os.path.join(
            model_dir, "vocab.txt"),
                                       do_lower_case=True)

        ckpt_dir = os.path.dirname(save_path)
        bert_params = bert.params_from_pretrained_ckpt(ckpt_dir)

        self.assertEqual(bert_params.token_type_vocab_size, 2)
        bert_params.extra_tokens_vocab_size = 3

        l_bert = bert.BertModelLayer.from_params(bert_params)
        # we dummy call the layer once in order to instantiate the weights
        l_bert([np.array([[1, 1, 0]]),
                np.array([[1, 0, 0]])],
               mask=[[True, True, False]])

        mismatched = bert.load_stock_weights(l_bert, save_path)
        self.assertEqual(0, len(mismatched),
                         "token_type embeddings should have mismatched shape")

        l_bert([np.array([[1, -3, 0]]),
                np.array([[1, 0, 0]])],
               mask=[[True, True, False]])
def get_bert_model(max_length: int,
                   freeze_bert_layers: bool = False,
                   load_bert_weights: bool = True) -> tf.keras.Model:
    """
    Requires a bert folder downloaded from https://github.com/google-research/bert
    :param max_length: maximum size of a sentence
    :return: tensorflow model object
    """
    bert_params: BertModelLayer.Params = params_from_pretrained_ckpt(model_dir)

    l_bert: BertModelLayer = BertModelLayer.from_params(bert_params,
                                                        name='bert')

    if freeze_bert_layers:
        # With all bert weights frozen, the performance is not very good
        l_bert.apply_adapter_freeze()
        l_bert.trainable = False

    l_input_ids: tf.Tensor = tf.keras.layers.Input(shape=(max_length, ),
                                                   dtype='int32')
    # If needed, usage of token_type_ids is described here: https://github.com/kpe/bert-for-tf2/blob/master/examples/gpu_movie_reviews.ipynb

    output: tf.Tensor = l_bert(
        l_input_ids)  # [batch_size, max_seq_len, hidden_size]
    output = tf.keras.layers.GlobalAveragePooling1D()(
        output)  # [batch_size, hidden_size]

    # Fine-tune for task
    output = tf.keras.layers.Dense(class_count, activation='softmax')(
        output)  # [batch_size, class_count]

    model: tf.keras.Model = tf.keras.Model(inputs=[l_input_ids],
                                           outputs=output)
    # Comment from bert repo: The learning rate we used in the paper was 1e-4.
    # However, if you are doing additional steps of pre-training starting from an existing BERT checkpoint, you should use a smaller learning rate (e.g., 2e-5)
    model.compile(input_shape=[(None, max_length), (None, max_length)],
                  loss=tf.keras.losses.SparseCategoricalCrossentropy(),
                  optimizer=tf.keras.optimizers.Adam(lr=1e-5),
                  metrics=['accuracy'])
    model.summary()

    if load_bert_weights:
        bert_ckpt_file: str = os.path.join(model_dir, "bert_model.ckpt")
        load_stock_weights(l_bert, bert_ckpt_file)

    return model
    def test_extend_pretrained_segments(self):

        model_dir = tempfile.TemporaryDirectory().name
        os.makedirs(model_dir)
        save_path = MiniBertFactory.create_mini_bert_weights(model_dir)
        tokenizer = bert.FullTokenizer(vocab_file=os.path.join(
            model_dir, "vocab.txt"),
                                       do_lower_case=True)

        ckpt_dir = os.path.dirname(save_path)
        bert_params = bert.params_from_pretrained_ckpt(ckpt_dir)

        self.assertEqual(bert_params.token_type_vocab_size, 2)
        bert_params.token_type_vocab_size = 4

        l_bert = bert.BertModelLayer.from_params(bert_params)

        # we dummy call the layer once in order to instantiate the weights
        l_bert([np.array([[1, 1, 0]]),
                np.array([[1, 0, 0]])])  #, mask=[[True, True, False]])

        #
        # - load the weights from a pre-trained model,
        # - expect a mismatch for the token_type embeddings
        # - use the segment/token type id=0 embedding for the missing token types
        #
        mismatched = bert.load_stock_weights(l_bert, save_path)

        self.assertEqual(1, len(mismatched),
                         "token_type embeddings should have mismatched shape")

        for weight, value in mismatched:
            if re.match("(.*)embeddings/token_type_embeddings/embeddings:0",
                        weight.name):
                seg0_emb = value[:1, :]
                new_segment_embeddings = np.repeat(
                    seg0_emb, (weight.shape[0] - value.shape[0]), axis=0)
                new_value = np.concatenate([value, new_segment_embeddings],
                                           axis=0)
                keras.backend.batch_set_value([(weight, new_value)])

        tte = l_bert.embeddings_layer.token_type_embeddings_layer.weights[0]

        if not tf.executing_eagerly():
            with tf.keras.backend.get_session() as sess:
                tte, = sess.run((tte, ))

        self.assertTrue(np.allclose(seg0_emb, tte[0], 1e-6))
        self.assertFalse(np.allclose(seg0_emb, tte[1], 1e-6))
        self.assertTrue(np.allclose(seg0_emb, tte[2], 1e-6))
        self.assertTrue(np.allclose(seg0_emb, tte[3], 1e-6))

        bert_params.token_type_vocab_size = 4
        print("token_type_vocab_size", bert_params.token_type_vocab_size)
        print(l_bert.embeddings_layer.trainable_weights[1])
Example #10
0
    def __init__(self, model_dir, max_length, bert_params, num_layers,
                 trainable):
        super(EncoderBert, self).__init__(self)

        assert isinstance(max_length, int)
        assert bert_params is not None or model_dir is not None

        if bert_params is None:
            assert os.path.exists(model_dir)
            bert_params = params_from_pretrained_ckpt(model_dir)
        if isinstance(num_layers, int):
            bert_params.num_layers = num_layers

        if bert_params.max_position_embeddings < max_length:
            bert_params.max_position_embeddings = max_length

        l_bert = BertModelLayer.from_params(bert_params, name="bert")

        l_input_ids = tf.keras.layers.Input(shape=(max_length, ),
                                            dtype='int32')

        output = l_bert(l_input_ids)
        model = tf.keras.Model(inputs=l_input_ids, outputs=output)
        model.build(input_shape=(None, max_length))

        def flatten_layers(root_layer):
            if isinstance(root_layer, tf.keras.layers.Layer):
                yield root_layer
            for layer in root_layer._layers:
                for sub_layer in flatten_layers(layer):
                    yield sub_layer

        if not trainable:
            for layer in flatten_layers(l_bert):
                layer.trainable = False

        self.model = model

        if model_dir is not None:
            bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt")
            load_stock_weights(l_bert, bert_ckpt_file)
Example #11
0
def BertModel(bertTokensShape):
    config = configparser.ConfigParser()
    config.read('conf.txt')
    bert_model_dir = config['GENERAL']['BERT_MODEL_DIR']
    bert_ckpt = config['GENERAL']['BERT_CKPT']

    current_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
    bert_model_dir = os.path.join(current_dir, "bert_model" ,bert_model_dir)

    inputs = keras.Input(shape=bertTokensShape, name='bert_token_ids')

    bert_layer = get_bert_layer(bert_model_dir)
    bert_vectors = bert_layer(inputs)
    bert_vectors = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_vectors)

    model = keras.Model(inputs=inputs, outputs=bert_vectors, name="bert_vectors")

    bert_ckpt_file = os.path.join(bert_model_dir, bert_ckpt)
    bert.load_stock_weights(bert_layer, bert_ckpt_file)

    return model
Example #12
0
def create_model() -> k.Sequential:
    bert_layer = create_bert_layer()

    model = k.Sequential([
        k.layers.Input(shape=(MAX_LEN, ), dtype='int32', name='input_ids'),
        bert_layer,
        k.layers.TimeDistributed(k.layers.Dense(768 * 3,
                                                activation=tf.nn.relu)),
        k.layers.TimeDistributed(
            k.layers.Dense(len(CLASSES), activation=tf.nn.softmax))
    ])

    model.build()

    bert_layer.apply_adapter_freeze()
    bert.load_stock_weights(bert_layer, BERT_WEIGHTS_PATH)

    model.compile(loss='categorical_crossentropy',
                  optimizer=tf.optimizers.Adam(learning_rate=1e-4),
                  metrics=['categorical_accuracy'])

    return model
Example #13
0
    def test_multi(self):
        print(self.bert_ckpt_dir)
        bert_params = bert.loader.params_from_pretrained_ckpt(
            self.bert_ckpt_dir)
        bert_params.adapter_size = 32
        l_bert = bert.BertModelLayer.from_params(bert_params, name="bert")

        max_seq_len = 128
        l_input_ids = tf.keras.layers.Input(shape=(max_seq_len, ),
                                            dtype='int32',
                                            name="input_ids")
        l_token_type_ids = tf.keras.layers.Input(shape=(max_seq_len, ),
                                                 dtype='int32',
                                                 name="token_type_ids")
        output = l_bert([l_input_ids, l_token_type_ids])

        model = tf.keras.Model(inputs=[l_input_ids, l_token_type_ids],
                               outputs=output)
        model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)])

        bert.load_stock_weights(l_bert, self.bert_ckpt_file)

        model.summary()
Example #14
0
def get_bert_model():
    bert_params = params_from_pretrained_ckpt(model_dir)

    l_bert = BertModelLayer.from_params(bert_params, name='bert')
    # Freeze bert layers
    l_bert.apply_adapter_freeze()
    l_bert.trainable = False

    l_input_ids = tf.keras.layers.Input(shape=(max_length, ), dtype='int32')
    l_token_type_ids = tf.keras.layers.Input(shape=(max_length, ),
                                             dtype='int32')

    # provide a custom token_type/segment id as a layer input
    intermediate_output = l_bert([l_input_ids, l_token_type_ids
                                  ])  # [batch_size, max_seq_len, hidden_size]

    averaged_output = tf.keras.layers.GlobalAveragePooling1D()(
        intermediate_output)
    l_middle_output = tf.keras.layers.Dense(16, activation='relu')
    l_output = tf.keras.layers.Dense(1, activation='sigmoid')
    m_output = l_middle_output(averaged_output)
    output = l_output(m_output)

    model = tf.keras.Model(inputs=[l_input_ids, l_token_type_ids],
                           outputs=output)

    optimizer = tf.keras.optimizers.Adam()
    model.compile(input_shape=[(None, max_length), (None, max_length)],
                  loss='binary_crossentropy',
                  optimizer=optimizer,
                  metrics=['accuracy'])
    model.summary()

    bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt")
    load_stock_weights(l_bert, bert_ckpt_file)

    return model
    def test_finetuning_workflow(self):
        # create a BERT layer with config from the checkpoint
        bert_params = bert.params_from_pretrained_ckpt(self.ckpt_dir)

        max_seq_len = 12

        model, l_bert = self.build_model(bert_params, max_seq_len=max_seq_len)
        model.summary()

        # freeze non-adapter weights
        l_bert.apply_adapter_freeze()
        model.summary()

        # load the BERT weights from the pre-trained model
        bert.load_stock_weights(l_bert, self.ckpt_path)

        # prepare the data
        inputs, targets = ["hello world", "goodbye"], [1, 2]
        tokens = [self.tokenizer.tokenize(toks) for toks in inputs]
        tokens = [
            self.tokenizer.convert_tokens_to_ids(toks) for toks in tokens
        ]
        tokens = [toks + [0] * (max_seq_len - len(toks)) for toks in tokens]
        x = np.array(tokens)
        y = np.array(targets)

        # fine tune
        model.fit(x, y, epochs=3)

        # preserve the logits for comparison before and after restoring the fine-tuned model
        logits = model.predict(x)

        # now store the adapter weights only

        # old fashion - using saver
        #  finetuned_weights = {w.name: w.value() for w in model.trainable_weights}
        #  saver = tf.compat.v1.train.Saver(finetuned_weights)
        #  fine_path = saver.save(tf.compat.v1.keras.backend.get_session(), fine_ckpt)

        fine_ckpt = os.path.join(self.ckpt_dir, "fine-tuned.ckpt")
        finetuned_weights = {w.name: w for w in model.trainable_weights}
        checkpoint = tf.train.Checkpoint(**finetuned_weights)
        fine_path = checkpoint.save(file_prefix=fine_ckpt)
        print("fine tuned ckpt:", fine_path)

        # build new model
        tf.compat.v1.keras.backend.clear_session()
        model, l_bert = self.build_model(bert_params, max_seq_len=max_seq_len)
        l_bert.apply_adapter_freeze()

        # load the BERT weights from the pre-trained checkpoint
        bert.load_stock_weights(l_bert, self.ckpt_path)

        # load the fine tuned classifier model weights
        finetuned_weights = {w.name: w for w in model.trainable_weights}
        checkpoint = tf.train.Checkpoint(**finetuned_weights)
        load_status = checkpoint.restore(fine_path)
        load_status.assert_consumed().run_restore_ops()

        logits_restored = model.predict(x)

        # check the predictions of the restored model
        self.assertTrue(np.allclose(logits_restored, logits, 1e-6))
Example #16
0
def get_model(
    lang,
    model_type,
    bert_model_path,
    max_length=300,
    num_feature=2,
    saved_epoch_path=None,
    configs=None,
):

    if model_type == "vi_attentive_reader":
        question_size = configs["question_size"]
        text_size = configs["text_size"]

        question_input = tf.keras.layers.Input(shape=(question_size))
        text_input = tf.keras.layers.Input(shape=(text_size))
        inputs = [question_input, text_input]

        attentive_reader = AttentiveReader(
            vocab_size=configs["vocab_size"],
            embedding_dim=200,
            q_units=200,
            p_units=200,
            num_rnn_layer=2,
        )
        output = attentive_reader(inputs)

        model = tf.keras.Model(inputs=inputs, outputs=output)

        if saved_epoch_path:
            # load the saved model
            # TODO: we will not save bert weights later
            print("Loading saved_epoch_path: {}".format(saved_epoch_path))
            model.load_weights(saved_epoch_path)

        return model

    elif model_type == "en_bert_bidaf":
        input_features = [
            tf.keras.layers.Input(shape=(num_feature, max_length))
        ]

        bert_bidaf = EnBertBidaf(
            bert_model_path=bert_model_path,
            max_length=max_length,
        )
        output = bert_bidaf(input_features)

        model = tf.keras.Model(inputs=input_features, outputs=output)

        if saved_epoch_path:
            # load the saved model
            # TODO: we will not save bert weights later
            print("Loading saved_epoch_path: {}".format(saved_epoch_path))
            model.load_weights(saved_epoch_path)
        else:
            # load weights for bert model
            weights_file = "{}/bert_model.ckpt".format(bert_model_path)
            print("Loading bert weights_file: {}".format(weights_file))
            load_stock_weights(bert_bidaf.bert_layer, weights_file)

        return model
Example #17
0
#l_token_type_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32')

# using the default token_type/segment id 0
#output = l_bert([l_input_ids, l_token_type_ids])                              # output: [batch_size, max_seq_len, hidden_size]
output = l_bert(l_input_ids)  # output: [batch_size, max_seq_len, hidden_size]

cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output)
#cls_out = keras.layers.Dropout(0.5)(cls_out)
#logits = keras.layers.Dense(units=768, activation="tanh")(cls_out)
#logits = keras.layers.Dropout(0.5)(logits)
logits = keras.layers.Dense(units=3, activation="softmax")(cls_out)
model = keras.Model(inputs=l_input_ids, outputs=logits)
model.build(input_shape=(None, max_seq_len))

bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt")
bert.load_stock_weights(l_bert, bert_ckpt_file)

model.compile(
    optimizer=keras.optimizers.Adam(),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")])

model.summary()


def create_learning_rate_scheduler(max_learn_rate=5e-5,
                                   end_learn_rate=1e-7,
                                   warmup_epoch_count=10,
                                   total_epoch_count=90):
    def lr_scheduler(epoch):
        if epoch < warmup_epoch_count:
Example #18
0
def _load_bert(config):
    """
    Loads bert model using bert-for-tf2

    Args:
        config:

    Returns:
        bert-for-tf2 model
    """

    model_ckpt = config["embedder"]["bert"]["model_ckpt"]
    bert_params = bert_for_tf2.params_from_pretrained_ckpt(
        config["embedder"]["bert"]["model_dir"])
    max_seq_len = config["embedder"]["bert"]["max_seq_len"]
    # max_seq_len = bert_params.max_position_embeddings

    l_bert = bert_for_tf2.BertModelLayer.from_params(bert_params)
    l_input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32')
    # l_token_type_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32')

    # (1 segment) using the default token_type/segment id 0
    bert_output = l_bert(
        l_input_ids)  # output: [batch_size, max_seq_len, hidden_size]

    # Pooling layer for sentence vector
    # if pooling == "default":  # First token ([CLS]) "This output is usually not a good summary of the semantic content ..."
    #     first_token_tensor = tf.squeeze(bert_output[:, 0:1, :], axis=1)
    #     output = tf.keras.layers.Dense(bert_params.hidden_size,
    #                                    activation=tf.tanh,
    #                                    kernel_initializer=tf.keras.initializers.TruncatedNormal(
    #                                        stddev=bert_params.initializer_range))(first_token_tensor)
    # if pooling == "average":
    #     output = tf.squeeze(
    #         tf.keras.layers.AveragePooling1D(pool_size=max_seq_len, data_format='channels_last')(bert_output),
    #         axis=1)
    # elif pooling == "max":
    #     output = tf.squeeze(
    #         tf.keras.layers.MaxPool1D(pool_size=self.max_seq_len, data_format='channels_last')(bert_output),
    #         axis=1)
    # # else if pooling == "median" : # remove zeros and do something
    # elif pooling == "none":
    #     output = bert_output
    #
    # model = keras.Model(inputs=l_input_ids, outputs=output)
    # model.build(input_shape=(None, max_seq_len))

    first_token_tensor = tf.squeeze(bert_output[:, 0:1, :], axis=1)
    pooled_output = tf.keras.layers.Dense(
        bert_params.hidden_size,
        activation=tf.tanh,
        kernel_initializer=tf.keras.initializers.TruncatedNormal(
            stddev=bert_params.initializer_range))(first_token_tensor)
    pooled_model = keras.Model(inputs=l_input_ids, outputs=pooled_output)
    pooled_model.build(input_shape=(None, max_seq_len))
    model = keras.Model(inputs=l_input_ids, outputs=bert_output)
    model.build(input_shape=(None, max_seq_len))

    l_bert.apply_adapter_freeze()
    bert_for_tf2.load_stock_weights(l_bert, model_ckpt)

    return model, pooled_model
def loadBertCheckpoint():
    modelsFolder = os.path.join(modelBertDir, "uncased_L-2_H-128_A-2")
    checkpointName = os.path.join(modelsFolder, "bert_model.ckpt")
    bert.load_stock_weights(bert_layer, checkpointName)
def load_bert_checkpoint():
    # checkpoint_name = os.path.join(models_folder, "bert_model.ckpt")

    bert.load_stock_weights(bert_layer, checkpoint_name)
Example #21
0
def create_estimator(steps=None, warmup_steps=None, model_dir=args.model_dir, num_labels=args.num_labels,
                     max_seq_len=args.max_seq_len, learning_rate=args.learning_rate, name='bert'):
    def my_auc(labels, predictions):
        auc_metric = tf.keras.metrics.AUC(name="my_auc")
        auc_metric.update_state(y_true=labels, y_pred=tf.argmax(predictions, 1))
        return {'auc': auc_metric}

    if name == 'bert':
        if warmup_steps is None:
            custom_objects = {
                'BertModelLayer': bert.BertModelLayer,
                'AdamW': AdamW,
                'PruneLowMagnitude': PruneLowMagnitude
            }
            if args.prune_enabled:
                with sparsity.prune_scope():
                    model = tf.keras.models.load_model(h5py.File(args.keras_model_path), custom_objects=custom_objects)
            else:
                model = tf.keras.models.load_model(h5py.File(args.keras_model_path), custom_objects=custom_objects)
            estimator = tf.keras.estimator.model_to_estimator(model, model_dir=args.output_dir)
            return estimator, model
        input_token_ids = tf.keras.Input((max_seq_len,), dtype=tf.int32, name='input_ids')
        input_segment_ids = tf.keras.Input((max_seq_len,), dtype=tf.int32, name='segment_ids')
        input_mask = tf.keras.Input((max_seq_len,), dtype=tf.int32, name='input_mask')
        bert_params = bert.params_from_pretrained_ckpt(model_dir)
        l_bert = bert.BertModelLayer.from_params(bert_params)
        bert_output = l_bert(inputs=[input_token_ids, input_segment_ids], mask=input_mask)
        if args.pool_strategy == 'cls':
            first_token = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output)
            pooled_output = tf.keras.layers.Dense(units=first_token.shape[-1], activation=tf.math.tanh)(first_token)
            dropout = tf.keras.layers.Dropout(rate=0.1)(pooled_output)
        elif args.pool_strategy == 'avg':
            seq1_tokens = tf.keras.layers.Lambda(lambda seq: seq[:,1:args.max_seq_len-1,:])(bert_output)
            seq2_tokens = tf.keras.layers.Lambda(lambda seq: seq[:,args.max_seq_len:2*args.max_seq_len])
        pruning_params = {
            'pruning_schedule': sparsity.PolynomialDecay(initial_sparsity=0.50,
                                                         final_sparsity=0.90,
                                                         begin_step=1000,
                                                         end_step=2000,
                                                         frequency=100)
        }
        dense = tf.keras.layers.Dense(units=num_labels, name='label_ids')
        if args.prune_enabled:
            pruned_dense = sparsity.prune_low_magnitude(
                dense,
                **pruning_params)
            logits = pruned_dense(dropout)
        else:
            logits = dense(dropout)
        output_prob = tf.keras.layers.Softmax(name='output_prob')(logits)
        model = tf.keras.Model(inputs=[input_token_ids, input_segment_ids, input_mask], outputs=[logits])
        model.build(input_shape=[(None, max_seq_len,), (None, max_seq_len,), (None, max_seq_len,)])
        # freeze_bert_layers(l_bert)
        bert.load_stock_weights(l_bert, op.join(model_dir, 'bert_model.ckpt'))
        weight_decays = get_weight_decays(model)
        for k, v in weight_decays.items():
            if use_weight_decay(k):
                weight_decays[k] = 0.01
            else:
                del weight_decays[k]
        opt = create_optimizer(
            init_lr=learning_rate,
            steps=steps,
            weight_decays=weight_decays,
            warmup_steps=warmup_steps,
        )
        model.compile(
            optimizer=opt,
            loss={"{}label_ids".format(
                'prune_low_magnitude_' if args.prune_enabled else ''): tf.keras.losses.SparseCategoricalCrossentropy(
                from_logits=True)},
            # for numerical stability
            metrics=[tf.keras.metrics.SparseCategoricalAccuracy()]
        )
        model.summary()
        config = tf.compat.v1.ConfigProto()
        config.gpu_options.allow_growth = True
        config.gpu_options.per_process_gpu_memory_fraction = args.gpu_memory_fraction
        config.log_device_placement = False
        exclude_optimizer_variables = r'^((?!(iter_updates|eta_t)).)*$'
        ws = tf.estimator.WarmStartSettings(
            ckpt_to_initialize_from=op.join(args.output_dir, 'keras'),
            vars_to_warm_start=exclude_optimizer_variables
        )
        estimator = tf.keras.estimator.model_to_estimator(keras_model=model,
                                                          config=tf.estimator.RunConfig(
                                                              model_dir=args.output_dir,
                                                              session_config=config,
                                                          ))
        estimator._warm_start_settings = ws
        return estimator, model
    raise NotImplemented("* available models: [ bert, ]")
Example #22
0
def compile_model(max_seq_len=max_seq_len, adapter_size=adapter_size,
						batch_size=None, init_ckpt_file=None,
						init_bert_ckpt_file=bert_ckpt_file):
	"""

	:rtype: keras sequential model
	:param init_ckpt_file:
	:param max_seq_len:
	:param init_bert_ckpt_file:
	:param adapter_size:
	:type batch_size: integer
	"""
	# initializing Sequential model
	model = Sequential()
	# adding input_layer
	model.add(InputLayer(input_shape=(max_seq_len,), batch_size=batch_size, dtype="int32", name="input_ids"))
	# adding BERT layer
	bert_params = params_from_pretrained_ckpt(dirname(join(model_dir, 'bert_model.ckpt')))

	# create the bert layer
	bert_params.adapter_size = adapter_size
	bert_params.adapter_init_scale = 1e-5
	bert_layer = BertModelLayer.from_params(bert_params, name="bert")

	model.add(bert_layer)
	# adding temporal Dense, Normalization and Activation layers
	model.add(TimeDistributed(Dense(bert_params.hidden_size // 32)))
	model.add(TimeDistributed(LayerNormalization()))
	model.add(TimeDistributed(Activation("tanh")))
	model.add(Concat([
		Lambda(lambda x: tf.math.reduce_max(x, axis=1, keepdims=False)),
		GlobalAveragePooling1D()])
	)
	# dense_hidden_layer
	model.add(Dense(units=bert_params.hidden_size // 16))
	# normalization_layer
	model.add(LayerNormalization())
	# activation_layer
	model.add(Activation("tanh"))
	# dense_layer
	model.add(Dense(units=2))
	model.build(input_shape=(batch_size, max_seq_len))

	# freeze non-adapter-BERT layers for the case adapter_size is set
	bert_layer.apply_adapter_freeze()
	bert_layer.embeddings_layer.trainable = False  # True for unfreezing emb LayerNorms

	# apply global regularization on all trainable dense layers
	pf.utils.add_dense_layer_loss(model,
									kernel_regularizer=regularizers.l2(0.01),
									bias_regularizer=regularizers.l2(0.01))

	model.compile(optimizer=RAdam(),
					loss=SparseCategoricalCrossentropy(from_logits=True),
                    metrics=[SparseCategoricalAccuracy(name="acc")])
	# load the pre-trained model weights (once the input_shape is known)
	if init_ckpt_file:
		print("Loading model weights from:", init_ckpt_file)
		model.load_weights(init_ckpt_file)
	elif init_bert_ckpt_file:
		print("Loading pre-trained BERT layer from:", init_bert_ckpt_file)
		load_stock_weights(bert_layer, init_bert_ckpt_file)

	return model
def loadBertCheckpoint():
    modelsFolder = os.path.join('./model/', "multi_cased_L-12_H-768_A-12")
    checkpointName = os.path.join(modelsFolder, "bert_model.ckpt")

    bert.load_stock_weights(bert_layer, checkpointName)
Example #24
0
def loadBertCheckpoint():
    pTrain_dir = pTrain_dir
    checkpointName = os.path.join(pTrain_dir, "bert_model.ckpt")

    bert.load_stock_weights(bert_layer, checkpointName)
Example #25
0
def build_transformer(transformer, max_seq_length, num_labels, tagging=True, tokenizer_only=False):
    spm_model_file = None
    if transformer in zh_albert_models_google:
        from bert.tokenization.albert_tokenization import FullTokenizer
        model_url = zh_albert_models_google[transformer]
        albert = True
    elif transformer in albert_models_tfhub:
        from bert.tokenization.albert_tokenization import FullTokenizer
        with stdout_redirected(to=os.devnull):
            model_url = fetch_tfhub_albert_model(transformer,
                                                 os.path.join(hanlp_home(), 'thirdparty', 'tfhub.dev', 'google',
                                                              transformer))
        albert = True
        spm_model_file = glob.glob(os.path.join(model_url, 'assets', '*.model'))
        assert len(spm_model_file) == 1, 'No vocab found or unambiguous vocabs found'
        spm_model_file = spm_model_file[0]
    elif transformer in bert_models_google:
        from bert.tokenization.bert_tokenization import FullTokenizer
        model_url = bert_models_google[transformer]
        albert = False
    else:
        raise ValueError(
            f'Unknown model {transformer}, available ones: {list(bert_models_google.keys()) + list(zh_albert_models_google.keys()) + list(albert_models_tfhub.keys())}')
    bert_dir = get_resource(model_url)
    if spm_model_file:
        vocab = glob.glob(os.path.join(bert_dir, 'assets', '*.vocab'))
    else:
        vocab = glob.glob(os.path.join(bert_dir, '*vocab*.txt'))
    assert len(vocab) == 1, 'No vocab found or unambiguous vocabs found'
    vocab = vocab[0]
    lower_case = any(key in transformer for key in ['uncased', 'multilingual', 'chinese', 'albert'])
    if spm_model_file:
        # noinspection PyTypeChecker
        tokenizer = FullTokenizer(vocab_file=vocab, spm_model_file=spm_model_file, do_lower_case=lower_case)
    else:
        tokenizer = FullTokenizer(vocab_file=vocab, do_lower_case=lower_case)
    if tokenizer_only:
        return tokenizer
    if spm_model_file:
        bert_params = albert_params(bert_dir)
    else:
        bert_params = bert.params_from_pretrained_ckpt(bert_dir)
    l_bert = bert.BertModelLayer.from_params(bert_params, name='albert' if albert else "bert")
    l_input_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype='int32', name="input_ids")
    l_mask_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype='int32', name="mask_ids")
    l_token_type_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype='int32', name="token_type_ids")
    output = l_bert([l_input_ids, l_token_type_ids], mask=l_mask_ids)
    if not tagging:
        output = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :])(output)
    if bert_params.hidden_dropout:
        output = tf.keras.layers.Dropout(bert_params.hidden_dropout, name='hidden_dropout')(output)
    logits = tf.keras.layers.Dense(num_labels, kernel_initializer=tf.keras.initializers.TruncatedNormal(
        bert_params.initializer_range))(output)
    model = tf.keras.Model(inputs=[l_input_ids, l_mask_ids, l_token_type_ids], outputs=logits)
    model.build(input_shape=(None, max_seq_length))
    if not spm_model_file:
        ckpt = glob.glob(os.path.join(bert_dir, '*.index'))
        assert ckpt, f'No checkpoint found under {bert_dir}'
        ckpt, _ = os.path.splitext(ckpt[0])
    with stdout_redirected(to=os.devnull):
        if albert:
            if spm_model_file:
                skipped_weight_value_tuples = bert.load_albert_weights(l_bert, bert_dir)
            else:
                # noinspection PyUnboundLocalVariable
                skipped_weight_value_tuples = load_stock_weights(l_bert, ckpt)
        else:
            # noinspection PyUnboundLocalVariable
            skipped_weight_value_tuples = bert.load_bert_weights(l_bert, ckpt)
    assert 0 == len(skipped_weight_value_tuples), f'failed to load pretrained {transformer}'
    return model, tokenizer