Beispiel #1
0
 def __init__(self, model_dir: str, tvu: TargetVocabUtil, max_seq_len: int,
              freeze_bert_layer: bool):
     bert_params = bert.params_from_pretrained_ckpt(model_dir)
     self.l_bert = bert.BertModelLayer.from_params(bert_params, name="bert")
     self.vu = tvu
     self.max_seq_len = max_seq_len
     self.freeze_bert_layer = freeze_bert_layer
Beispiel #2
0
    def bert_2(self, bert_config_file=None, bert_ckpt_file=None):
        with tf.io.gfile.GFile(bert_config_file, "r") as reader:
            bert_params = params_from_pretrained_ckpt(bert_ckpt_file)
            l_bert = BertModelLayer.from_params(bert_params, name="bert")
            #l_bert.apply_adapter_freeze()
            #l_bert.embeddings_layer.trainable = False

        in_sentence = Input(shape=(150, ), dtype='int64', name="Input1")

        bert_output = l_bert(in_sentence)

        lstm_output = GlobalAveragePooling1D()(bert_output)
        sentence_model = Model(in_sentence, lstm_output)

        section_input = Input(shape=(300, 150), dtype='int64', name="Input2")
        section_encoded = TimeDistributed(sentence_model)(section_input)
        section_encoded = LSTM(300)(section_encoded)
        section_encoded = Dense(21)(section_encoded)
        section_model = Model(section_input, section_encoded)

        section_model.compile(optimizer="adam", loss="binary_crossentropy")

        sentence_model.summary()
        section_model.summary()

        return section_model
Beispiel #3
0
 def _create_bert_layer(self):
     # Loads a BERT Keras layer from a downloaded pretrained module.
     bert_params = bert.params_from_pretrained_ckpt(self.bert_dir)
     bert_layer = bert.BertModelLayer.from_params(bert_params, name="bert")
     bert_layer.apply_adapter_freeze()
     checkpoint_name = os.path.join(self.bert_dir, "bert_model.ckpt.data-00000-of-00001")
     return bert_layer
    def test_regularization(self):
        # create a BERT layer with config from the checkpoint
        bert_params = bert.params_from_pretrained_ckpt(self.ckpt_dir)

        max_seq_len = 12

        model, l_bert = self.build_model(bert_params, max_seq_len=max_seq_len)
        l_bert.apply_adapter_freeze()
        model.summary()

        kernel_regularizer = keras.regularizers.l2(0.01)
        bias_regularizer = keras.regularizers.l2(0.01)

        pf.utils.add_dense_layer_loss(model,
                                      kernel_regularizer=kernel_regularizer,
                                      bias_regularizer=bias_regularizer)
        # prepare the data
        inputs, targets = ["hello world", "goodbye"], [1, 2]
        tokens = [self.tokenizer.tokenize(toks) for toks in inputs]
        tokens = [
            self.tokenizer.convert_tokens_to_ids(toks) for toks in tokens
        ]
        tokens = [toks + [0] * (max_seq_len - len(toks)) for toks in tokens]
        x = np.array(tokens)
        y = np.array(targets)
        # fine tune
        model.fit(x, y, epochs=3)
Beispiel #5
0
def make_entity_start_model(bert_path, ckpt_file, max_seq_len, bert_dim):
    model_ckpt = bert_path + ckpt_file
    bert_params = params_from_pretrained_ckpt(bert_path)
    bert_layer = BertModelLayer.from_params(bert_params,
                                            name="bert",
                                            trainable=True)
    slice_fn = make_gather_entity_start_fn(bert_dim)

    input_ids = Input(shape=(max_seq_len, ), dtype='int32')
    index_ent1 = Input(shape=(2, ), dtype='int32')
    index_ent2 = Input(shape=(2, ), dtype='int32')
    bert_emb = bert_layer(input_ids)
    ent1_start = Lambda(lambda x: slice_fn(x))([bert_emb, index_ent1])
    ent2_start = Lambda(lambda x: slice_fn(x))([bert_emb, index_ent2])
    concat = concatenate([ent1_start, ent2_start])
    output = Dense(2, activation='softmax')(concat)
    model = Model(inputs=[input_ids, index_ent1, index_ent2], outputs=output)
    model.build(input_shape=(None, max_seq_len))

    load_bert_weights(bert_layer, model_ckpt)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    return model
Beispiel #6
0
def construct_bert(model_dir,
                   timesteps,
                   classes,
                   dense_dropout=0.5,
                   attention_dropout=0.3,
                   hidden_dropout=0.3,
                   adapter_size=8):
    bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt")
    bert_config_file = os.path.join(model_dir, "bert_config.json")
    bert_params = bert.params_from_pretrained_ckpt(model_dir)
    bert_model = bert.BertModelLayer.from_params(bert_params, name="bert")

    input_ids = Input(shape=(timesteps, ), dtype='int32', name="input_ids_1")
    token_type_ids = Input(shape=(timesteps, ),
                           dtype='int32',
                           name="token_type_ids_1")

    dense = Dense(units=768, activation="tanh", name="dense")
    output = bert_model([input_ids, token_type_ids
                         ])  # output: [batch_size, max_seq_len, hidden_size]

    print("bert shape", output.shape)
    cls_out = Lambda(lambda seq: seq[:, 0:1, :])(output)
    cls_out = Dropout(dense_dropout)(cls_out)
    logits = dense(cls_out)
    logits = Dropout(dense_dropout)(logits)
    logits = Dense(units=classes, activation="softmax",
                   name="output_1")(logits)

    model = Model(inputs=[input_ids, token_type_ids], outputs=logits)
    model.build(input_shape=(None, timesteps))

    # load the pre-trained model weights
    load_stock_weights(bert_model, bert_ckpt_file)
    return model
Beispiel #7
0
def get_bert_config(model_dir):
    """Function to get the bert config params
    
    Arguments:
        model_dir {String} -- Path to the bert_config.json file
    """
    return params_from_pretrained_ckpt(model_dir)
Beispiel #8
0
def build_transformer(transformer,
                      max_seq_length,
                      num_labels,
                      tagging=True,
                      tokenizer_only=False):
    if transformer in albert_models_google:
        from bert.tokenization.albert_tokenization import FullTokenizer
        model_url = albert_models_google[transformer]
        albert = True
    elif transformer in bert_models_google:
        from bert.tokenization.bert_tokenization import FullTokenizer
        model_url = bert_models_google[transformer]
        albert = False
    else:
        raise ValueError(
            f'Unknown model {transformer}, available ones: {list(bert_models_google.keys()) + list(albert_models_google.keys())}'
        )
    bert_dir = get_resource(model_url)
    vocab = glob.glob(os.path.join(bert_dir, '*vocab*.txt'))
    assert len(vocab) == 1, 'No vocab found or unambiguous vocabs found'
    vocab = vocab[0]
    # noinspection PyTypeChecker
    tokenizer = FullTokenizer(vocab_file=vocab)
    if tokenizer_only:
        return tokenizer
    bert_params = bert.params_from_pretrained_ckpt(bert_dir)
    l_bert = bert.BertModelLayer.from_params(bert_params, name="bert")
    l_input_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                        dtype='int32',
                                        name="input_ids")
    l_mask_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                       dtype='int32',
                                       name="mask_ids")
    l_token_type_ids = tf.keras.layers.Input(shape=(max_seq_length, ),
                                             dtype='int32',
                                             name="token_type_ids")
    output = l_bert([l_input_ids, l_token_type_ids], mask=l_mask_ids)
    if not tagging:
        output = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :])(output)
    if bert_params.hidden_dropout:
        output = tf.keras.layers.Dropout(bert_params.hidden_dropout,
                                         name='hidden_dropout')(output)
    logits = tf.keras.layers.Dense(
        num_labels,
        kernel_initializer=tf.keras.initializers.TruncatedNormal(
            bert_params.initializer_range))(output)
    model = tf.keras.Model(inputs=[l_input_ids, l_mask_ids, l_token_type_ids],
                           outputs=logits)
    model.build(input_shape=(None, max_seq_length))
    ckpt = glob.glob(os.path.join(bert_dir, '*.index'))
    assert ckpt, f'No checkpoint found under {bert_dir}'
    ckpt, _ = os.path.splitext(ckpt[0])
    with stdout_redirected(to=os.devnull):
        if albert:
            skipped_weight_value_tuples = load_stock_weights(l_bert, ckpt)
        else:
            skipped_weight_value_tuples = bert.load_bert_weights(l_bert, ckpt)
    assert 0 == len(skipped_weight_value_tuples
                    ), f'failed to load pretrained {transformer}'
    return model, tokenizer
Beispiel #9
0
    def test_extend_pretrained_tokens(self):
        model_dir = tempfile.TemporaryDirectory().name
        os.makedirs(model_dir)
        save_path = MiniBertFactory.create_mini_bert_weights(model_dir)
        tokenizer = bert.FullTokenizer(vocab_file=os.path.join(
            model_dir, "vocab.txt"),
                                       do_lower_case=True)

        ckpt_dir = os.path.dirname(save_path)
        bert_params = bert.params_from_pretrained_ckpt(ckpt_dir)

        self.assertEqual(bert_params.token_type_vocab_size, 2)
        bert_params.extra_tokens_vocab_size = 3

        l_bert = bert.BertModelLayer.from_params(bert_params)
        # we dummy call the layer once in order to instantiate the weights
        l_bert([np.array([[1, 1, 0]]),
                np.array([[1, 0, 0]])],
               mask=[[True, True, False]])

        mismatched = bert.load_stock_weights(l_bert, save_path)
        self.assertEqual(0, len(mismatched),
                         "token_type embeddings should have mismatched shape")

        l_bert([np.array([[1, -3, 0]]),
                np.array([[1, 0, 0]])],
               mask=[[True, True, False]])
Beispiel #10
0
def make_entity_border_encoder(bert_path, ckpt_file, max_seq_len, bert_dim):
    model_ckpt = bert_path + ckpt_file
    bert_params = params_from_pretrained_ckpt(bert_path)
    bert_layer = BertModelLayer.from_params(bert_params,
                                            name="bert",
                                            trainable=False)
    gather_fn = make_gather_entity_border_fn(bert_dim)

    input_ids = Input(shape=(max_seq_len, ), dtype='int32')
    index_border_ent1 = Input(shape=(2, ), dtype='int32')
    index_border_ent2 = Input(shape=(2, ), dtype='int32')
    bert_emb = bert_layer(input_ids)
    ent1_avg_emb = Lambda(lambda x: gather_fn(x))(
        [bert_emb, index_border_ent1])
    ent2_avg_emb = Lambda(lambda x: gather_fn(x))(
        [bert_emb, index_border_ent2])
    ent1_flatten = Flatten()(ent1_avg_emb)
    ent2_flatten = Flatten()(ent2_avg_emb)
    output = concatenate([ent1_flatten, ent2_flatten])

    model = Model(inputs=[input_ids, index_border_ent1, index_border_ent2],
                  outputs=output)
    model.build(input_shape=(None, max_seq_len))

    load_bert_weights(bert_layer, model_ckpt)
    model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

    return model
    def test_bert_freeze(self):
        model_dir = tempfile.TemporaryDirectory().name
        os.makedirs(model_dir)
        save_path = MiniBertFactory.create_mini_bert_weights(model_dir)
        tokenizer = bert.bert_tokenization.FullTokenizer(vocab_file=os.path.join(model_dir, "vocab.txt"), do_lower_case=True)

        # prepare input
        max_seq_len  = 24
        input_str_batch    = ["hello, bert!", "how are you doing!"]

        input_ids, token_type_ids = self.prepare_input_batch(input_str_batch, tokenizer, max_seq_len)

        bert_ckpt_file   = os.path.join(model_dir, "bert_model.ckpt")

        bert_params = bert.params_from_pretrained_ckpt(model_dir)
        bert_params.adapter_size = 4
        l_bert = bert.BertModelLayer.from_params(bert_params)

        model = keras.models.Sequential([
            l_bert,
        ])

        model.build(input_shape=(None, max_seq_len))

        model.summary()
        l_bert.apply_adapter_freeze()
        model.summary()

        bert.load_stock_weights(l_bert, bert_ckpt_file)
        #l_bert.embeddings_layer.trainable = False

        model.summary()

        orig_weight_values = []
        for weight in l_bert.weights:
            orig_weight_values.append(weight.numpy())

        model.compile(optimizer=keras.optimizers.Adam(),
                      loss=keras.losses.mean_squared_error,
                      run_eagerly=True)

        trainable_count = len(l_bert.trainable_weights)

        orig_pred = model.predict(input_ids)
        model.fit(x=input_ids, y=np.zeros_like(orig_pred),
          batch_size=2,
          epochs=4)

        trained_count = 0
        for ndx, weight in enumerate(l_bert.weights):
            weight_equal = np.array_equal(weight.numpy(), orig_weight_values[ndx])
            print("{}: {}".format(weight_equal, weight.name))
            if not weight_equal:
                trained_count += 1

        print("  trained weights:", trained_count)
        print("trainable weights:", trainable_count)
        self.assertEqual(trained_count, trainable_count)

        model.summary()
Beispiel #12
0
    def build_transformer():
        bert_params = params_from_pretrained_ckpt(FLAGS.cs_model_loc)
        bert_params.hidden_dropout = 1 - FLAGS.cs_kp_tfm_hidden
        bert_params.attention_dropout = 1 - FLAGS.cs_kp_tfm_atten

        return AdvBertModelLayer.from_params(bert_params,
                                             name=FLAGS.cs_tfm_type)
Beispiel #13
0
def run_entity_marker_cls(bert_model_dir, do_lower_case):
    vocab_file = os.path.join(bert_model_dir, "vocab.txt")
    processor = input_processors.EntityProcessor(vocab_file=vocab_file,
                                                 do_lower_case=do_lower_case,
                                                 max_seq_length=128)

    head = heads.CLSHead(n_classes=1,
                         out_activation="sigmoid",
                         bias_initializer="zeros",
                         dropout_rate=0.0)
    inputs = processor.get_input_placeholders()
    bert_params = bert.params_from_pretrained_ckpt(bert_model_dir)
    bert_params["vocab_size"] = processor.vocab_size
    model_ckpt = os.path.join(args.bert_model_dir, "bert_model.ckpt")
    # Calls model.build()
    model = get_bert_classifier(inputs, bert_params, model_ckpt, head)

    opt = tf.keras.optimizers.Adam(learning_rate=3e-5)
    loss_fn = "binary_crossentropy"
    model.compile(optimizer=opt, loss=loss_fn)

    bert_inputs, tokenized_docs = processor.process(docs["entity"])
    loss = model.evaluate(bert_inputs, np.array([1]))

    print()
    print("=== Entity CLS ===")
    print(docs["entity"])
    print(tokenized_docs)
    print(bert_inputs)
    print()
    model.summary()
    print(f"Loss: {loss}")
Beispiel #14
0
def create_model(max_seq_len,
                 model_dir,
                 model_ckpt,
                 freeze=True,
                 adapter_size=4):
    bert_params = bert.params_from_pretrained_ckpt(model_dir)
    print(f'bert params: {bert_params}')
    bert_params.adapter_size = adapter_size
    bert_params.adapter_init_scale = 1e-5
    l_bert = bert.BertModelLayer.from_params(bert_params, name="bert")

    input_ids = keras.layers.Input(shape=(max_seq_len, ),
                                   dtype='int32',
                                   name="input_ids")
    bert_output = l_bert(input_ids)

    print("bert shape", bert_output.shape)

    cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :],
                                  name='lambda')(bert_output)
    cls_out = keras.layers.Dropout(0.5)(cls_out)
    logits = keras.layers.Dense(name='dense_sin',
                                units=768,
                                activation=tf.math.sin)(cls_out)
    # logits = keras.layers.Dense(name='dense_tanh', units=768, activation="tanh")(cls_out)
    # logits = keras.layers.Dense(name='dense_relu', units=256, activation="relu")(cls_out)
    # logits = keras.layers.Dense(name='dense_gelu', units=256, activation="gelu")(cls_out)
    logits = keras.layers.BatchNormalization()(logits)
    logits = keras.layers.Dropout(0.5)(logits)
    logits = keras.layers.Dense(name='initial_predictions',
                                units=len(classes),
                                activation="softmax")(logits)

    model = keras.Model(inputs=input_ids, outputs=logits)
    model.build(input_shape=(None, max_seq_len))

    model.summary()
    if freeze:
        l_bert.apply_adapter_freeze()
        l_bert.embeddings_layer.trainable = False
        model.summary()

    # Дополнительная инфа https://arxiv.org/abs/1902.00751
    # apply global regularization on all trainable dense layers
    pf.utils.add_dense_layer_loss(
        model,
        kernel_regularizer=keras.regularizers.l2(0.01),
        bias_regularizer=keras.regularizers.l2(0.01))

    model.compile(
        optimizer=pf.optimizers.RAdam(),
        # loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), # c логитами почему-то не работает совсем
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False),
        metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")])

    bert.load_stock_weights(l_bert, model_ckpt)
    # bert.load_bert_weights(l_bert, model_ckpt)

    return model
def create_bert_layer():
    global bert_layer

    bert_params = bert.params_from_pretrained_ckpt(models_folder)

    bert_layer = bert.BertModelLayer.from_params(bert_params, name="bert")

    # with adapter
    bert_layer.apply_adapter_freeze()
Beispiel #16
0
def create_bert_layer() -> bert.BertModelLayer:
    bert_params = bert.params_from_pretrained_ckpt(BERT_DIR)
    bert_params.mask_zero = True

    bert_layer = bert.BertModelLayer.from_params(bert_params, name='bert')

    bert_layer.apply_adapter_freeze()

    return bert_layer
    def test_extend_pretrained_segments(self):

        model_dir = tempfile.TemporaryDirectory().name
        os.makedirs(model_dir)
        save_path = MiniBertFactory.create_mini_bert_weights(model_dir)
        tokenizer = bert.FullTokenizer(vocab_file=os.path.join(
            model_dir, "vocab.txt"),
                                       do_lower_case=True)

        ckpt_dir = os.path.dirname(save_path)
        bert_params = bert.params_from_pretrained_ckpt(ckpt_dir)

        self.assertEqual(bert_params.token_type_vocab_size, 2)
        bert_params.token_type_vocab_size = 4

        l_bert = bert.BertModelLayer.from_params(bert_params)

        # we dummy call the layer once in order to instantiate the weights
        l_bert([np.array([[1, 1, 0]]),
                np.array([[1, 0, 0]])])  #, mask=[[True, True, False]])

        #
        # - load the weights from a pre-trained model,
        # - expect a mismatch for the token_type embeddings
        # - use the segment/token type id=0 embedding for the missing token types
        #
        mismatched = bert.load_stock_weights(l_bert, save_path)

        self.assertEqual(1, len(mismatched),
                         "token_type embeddings should have mismatched shape")

        for weight, value in mismatched:
            if re.match("(.*)embeddings/token_type_embeddings/embeddings:0",
                        weight.name):
                seg0_emb = value[:1, :]
                new_segment_embeddings = np.repeat(
                    seg0_emb, (weight.shape[0] - value.shape[0]), axis=0)
                new_value = np.concatenate([value, new_segment_embeddings],
                                           axis=0)
                keras.backend.batch_set_value([(weight, new_value)])

        tte = l_bert.embeddings_layer.token_type_embeddings_layer.weights[0]

        if not tf.executing_eagerly():
            with tf.keras.backend.get_session() as sess:
                tte, = sess.run((tte, ))

        self.assertTrue(np.allclose(seg0_emb, tte[0], 1e-6))
        self.assertFalse(np.allclose(seg0_emb, tte[1], 1e-6))
        self.assertTrue(np.allclose(seg0_emb, tte[2], 1e-6))
        self.assertTrue(np.allclose(seg0_emb, tte[3], 1e-6))

        bert_params.token_type_vocab_size = 4
        print("token_type_vocab_size", bert_params.token_type_vocab_size)
        print(l_bert.embeddings_layer.trainable_weights[1])
Beispiel #18
0
    def test_albert_chinese_weights(self):
        albert_model_name = "albert_base"
        albert_dir = bert.fetch_brightmart_albert_model(albert_model_name, ".models")
        albert_ckpt = os.path.join(albert_dir, "albert_model.ckpt")

        albert_params = bert.params_from_pretrained_ckpt(albert_dir)
        model, l_bert = self.build_model(albert_params)

        skipped_weight_value_tuples = bert.load_albert_weights(l_bert, albert_ckpt)
        self.assertEqual(0, len(skipped_weight_value_tuples))
        model.summary()
    def buildModel(self):

        bert_params = bert.params_from_pretrained_ckpt(self.preModelPath)

        inputLayer1 = keras.layers.Input(shape=(self.maxLen, ), dtype='int32')
        embeddingLayer1 = keras.layers.Embedding(
            input_dim=self.vocabSize + 1,
            output_dim=self.hiddenSize,
            input_length=self.maxLen,
        )(inputLayer1)
        reshapeLayer1 = keras.layers.Reshape(
            (self.maxLen, self.hiddenSize, 1))(embeddingLayer1)

        inputLayer2 = keras.layers.Input(shape=(self.maxLen, ), dtype='int32')
        embeddingLayer2 = keras.layers.Embedding(
            input_dim=self.vocabSize + 1,
            output_dim=self.hiddenSize,
            input_length=self.maxLen)(inputLayer2)
        reshapeLayer2 = keras.layers.Reshape(
            (self.maxLen, self.hiddenSize, 1))(embeddingLayer2)

        cnnLayer1 = keras.layers.Conv2D(
            3, kernel_size=(3, self.hiddenSize))(reshapeLayer1)
        poolingLayer1 = keras.layers.MaxPool2D(pool_size=(3, 1))(cnnLayer1)
        flattenLayer1 = keras.layers.Flatten()(poolingLayer1)
        denseLayer1 = keras.layers.Dense(128, activation="tanh")(flattenLayer1)
        denseLayer1 = keras.layers.Dense(64, activation="tanh")(denseLayer1)
        denseLayer1 = keras.layers.Dense(32, activation="tanh")(denseLayer1)

        cnnLayer2 = keras.layers.Conv2D(
            3, kernel_size=(3, self.hiddenSize))(reshapeLayer2)
        poolingLayer2 = keras.layers.MaxPool2D(pool_size=(3, 1))(cnnLayer2)
        flattenLayer2 = keras.layers.Flatten()(poolingLayer2)
        denseLayer2 = keras.layers.Dense(128, activation="tanh")(flattenLayer2)
        denseLayer2 = keras.layers.Dense(64, activation="tanh")(denseLayer2)
        denseLayer2 = keras.layers.Dense(32, activation="tanh")(denseLayer2)

        BLC1 = keras.layers.LayerNormalization()(denseLayer1)
        BLC2 = keras.layers.LayerNormalization()(denseLayer2)
        multLayer = keras.layers.Dot(axes=1, normalize=True)([BLC1, BLC2])
        nlLayer = 1 - multLayer
        concatLayer = keras.layers.concatenate([nlLayer, multLayer], axis=-1)

        # denseLayer=keras.layers.Dense(64,activation="tanh")(multLayer)
        # denseLayer=keras.layers.Dense(32,activation="tanh")(denseLayer)
        # denseLayer=keras.layers.Dense(16,activation="tanh")(denseLayer)

        # outputLayer=keras.layers.Dense(2,name="classifier",activation="softmax")(denseLayer)

        self.model = keras.models.Model([inputLayer1, inputLayer2],
                                        concatLayer)
        self.model.compile(loss=self.amsoftmax_loss,
                           optimizer=tf.keras.optimizers.Adam(
                               learning_rate=self.learning_rate))
    def test_bert_google_weights(self):
        bert_model_name = "uncased_L-12_H-768_A-12"
        bert_dir = bert.fetch_google_bert_model(bert_model_name, ".models")
        bert_ckpt = os.path.join(bert_dir, "bert_model.ckpt")

        bert_params = bert.params_from_pretrained_ckpt(bert_dir)
        model, l_bert = self.build_model(bert_params)

        skipped_weight_value_tuples = bert.load_bert_weights(l_bert, bert_ckpt)
        self.assertEqual(0, len(skipped_weight_value_tuples))
        model.summary()
Beispiel #21
0
def bLayer():
    global bert_layer

    pTrain_dir = 'cased_L-12_H-768_A-12'

    bert_params = bert.params_from_pretrained_ckpt(pTrain_dir)

    bert_layer = bert.BertModelLayer.from_params(bert_params, name="bert")

    bert_layer.apply_adapter_freeze()

    bert_layer.trainable = True
Beispiel #22
0
    def test_albert_zh_fetch_and_load(self):
        albert_model_name = "albert_tiny"
        albert_dir = bert.fetch_brightmart_albert_model(
            albert_model_name, ".models")

        model_params = bert.params_from_pretrained_ckpt(albert_dir)
        model_params.vocab_size = model_params.vocab_size + 2
        model_params.adapter_size = 1
        l_bert = bert.BertModelLayer.from_params(model_params, name="albert")
        l_bert(tf.zeros((1, 128)))
        res = bert.load_albert_weights(l_bert, albert_dir)
        self.assertTrue(len(res) > 0)
 def test_coverage_improve(self):
     bert_params = bert.params_from_pretrained_ckpt(self.ckpt_dir)
     model, l_bert = self.build_model(bert_params, 1)
     for weight in model.weights:
         try:
             name = bert.loader.map_to_stock_variable_name(
                 weight.name,
                 weight.name.split("/")[0])
             stock_name = bert.loader.map_from_stock_variale_name(name)
             self.assertEqual(name, stock_name)
         except:
             print(weight.name)
Beispiel #24
0
    def create_bert_model(self):
        bert_params = bert.params_from_pretrained_ckpt(self.model_dir)
        l_bert = bert.BertModelLayer.from_params(bert_params, name="bert")

        l_input_ids = tf.keras.layers.Input(shape=(self.max_seq_length, ),
                                            dtype='int32')
        output = l_bert(l_input_ids)
        model = tf.keras.Model(inputs=l_input_ids, outputs=output)
        model.build(input_shape=(None, self.max_seq_length))

        bert.load_stock_weights(l_bert, self.model_ckpt)

        return model
Beispiel #25
0
	def __init__(self, lang='en', adapter_size=None):
		model_name = lang_models[lang]
		model_dir = os.path.join(root_dir, model_name)
		self.bert_params = bert.params_from_pretrained_ckpt(model_dir)
		self.bert_params.adapter_size = adapter_size
		self.model_ckpt = os.path.join(model_dir, "bert_model.ckpt")
		self.l_bert = bert.BertModelLayer.from_params(self.bert_params, name="bert")

		do_lower_case = not (model_name.find("cased") == 0 or model_name.find("multi_cased") == 0)
		bert.bert_tokenization.validate_case_matches_checkpoint(do_lower_case, self.model_ckpt)
		vocab_file = os.path.join(model_dir, "vocab.txt")

		self.tokenizer = bert.bert_tokenization.FullTokenizer(vocab_file, do_lower_case)
Beispiel #26
0
def create_model(
    model_dir, model_type, max_seq_len, n_classes, load_pretrained_weights=True, summary=False,
):
    """Creates keras model with pretrained BERT/ALBERT layer.

    Args:
        model_dir: String. Path to model.
        model_type: String. Expects either "albert" or "bert"
        max_seq_len: Int. Maximum length of a classificaton example.
        n_classes: Int. Number of training classes.
        load_pretrained_weights: Boolean. Load pretrained model weights.
        summary: Boolean. Print model summary.

    Returns:
        Keras model
    """
    if model_type == "albert":
        model_ckpt = os.path.join(model_dir, "model.ckpt-best")
        model_params = bert.albert_params(model_dir)
    elif model_type == "bert":
        model_ckpt = os.path.join(model_dir, "bert_model.ckpt")
        model_params = bert.params_from_pretrained_ckpt(model_dir)

    layer_bert = bert.BertModelLayer.from_params(model_params, name=model_type)

    input_ids = keras.layers.Input(shape=(max_seq_len,), dtype="int32", name="input_ids")
    output = layer_bert(input_ids)

    cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output)
    cls_out = keras.layers.Dropout(0.5)(cls_out)
    logits = keras.layers.Dense(units=model_params["hidden_size"], activation="relu")(cls_out)
    logits = keras.layers.Dropout(0.5)(logits)
    logits = keras.layers.Dense(units=n_classes, activation="softmax")(logits)

    model = keras.Model(inputs=input_ids, outputs=logits)
    model.build(input_shape=(None, max_seq_len))

    if load_pretrained_weights:
        if model_type == "albert":
            bert.load_albert_weights(layer_bert, model_ckpt)
        elif model_type == "bert":
            bert.load_bert_weights(layer_bert, model_ckpt)

    model.compile(
        optimizer=keras.optimizers.Adam(),
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")],
    )
    if summary:
        model.summary()
    return model
    def buildModel(self):
        
        inputLayer = keras.layers.Input(shape=(self.maxLen,), dtype='int32')

        bert_params = bert.params_from_pretrained_ckpt(self.preModelPath)
        bertLayer = bert.BertModelLayer.from_params(bert_params, name="bert")(inputLayer)

        flattenLayer = keras.layers.Flatten()(bertLayer)
        outputLayer = keras.layers.Dense(
            self.classNum, activation="softmax")(flattenLayer)

        self.model = keras.models.Model(inputLayer,outputLayer)
        self.model.compile(loss="SparseCategoricalCrossentropy",
                            optimizer=tf.keras.optimizers.RMSprop(learning_rate=self.learning_rate))
Beispiel #28
0
    def test_coverage_improve(self):
        bert_params = bert.params_from_pretrained_ckpt(self.ckpt_dir)
        model, l_bert = self.build_model(bert_params, 1)
        for weight in model.weights:
            l_bert_prefix = bert.loader.bert_prefix(l_bert)

            stock_name = bert.loader.map_to_stock_variable_name(weight.name, l_bert_prefix)

            if stock_name is None:
                print("No BERT stock weight for", weight.name)
                continue

            keras_name = bert.loader.map_from_stock_variale_name(stock_name, l_bert_prefix)
            self.assertEqual(weight.name.split(":")[0], keras_name)
Beispiel #29
0
    def __init__(self,
                 embedding_size=100,
                 hidden_size=100,
                 bidirectional=True,
                 layer_size=1,
                 dropout=.5,
                 recurrent_dropout=.0,
                 embedding_weights=None,
                 embedding_trainable=True,
                 vocab_file=None,
                 bert=False,
                 bert_model_dir=None,
                 bert_max_length=4096,
                 bert_params=None,
                 bert_num_layers=None,
                 bert_trainable=False,
                 learning_rate=None,
                 optimizer='Adam',
                 loss='crf'):

        if bert:
            assert bert_params is not None or bert_model_dir is not None
            if bert_params is None:
                self.bert_params = params_from_pretrained_ckpt(bert_model_dir)
        else:
            self.bert_params = None

        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.bidirectional = bidirectional
        self.layer_size = layer_size
        self.dropout = dropout
        self.recurrent_dropout = recurrent_dropout
        self.model = None
        self.vocab_file = vocab_file
        self.bert = bert
        self.bert_model_dir = bert_model_dir
        self.bert_max_length = bert_max_length
        self.bert_num_layers = bert_num_layers
        self.bert_trainable = bert_trainable
        self.embedding_weights = embedding_weights
        self.embedding_trainable = embedding_trainable
        self.learning_rate = learning_rate
        self.optimizer = optimizer
        self.loss = loss

        self.tokenizer = None
        self.label = None
        self.batch_size = 32
Beispiel #30
0
def Bert_feature_extraction(ids,texts, max_seq_len, feature_file_name):
    #https://github.com/kpe/bert-for-tf2
    model_dir = ".models/uncased_L-12_H-768_A-12/uncased_L-12_H-768_A-12"
    bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt")

    bert_params = bert.params_from_pretrained_ckpt(model_dir)
    l_bert = bert.BertModelLayer.from_params(bert_params, name="bert")

    l_input_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32')
    l_token_type_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32')

    # using the default token_type/segment id 0
    output = l_bert(l_input_ids)  # output: [batch_size, max_seq_len, hidden_size]

    output = keras.layers.GlobalAveragePooling1D()(output)
    model = keras.Model(inputs=l_input_ids, outputs=output)
    model.build(input_shape=(None, max_seq_len))

    bert.load_stock_weights(l_bert, bert_ckpt_file)

    do_lower_case = not (model_dir.find("cased") == 0 or model_dir.find("multi_cased") == 0)
    bert.bert_tokenization.validate_case_matches_checkpoint(do_lower_case, bert_ckpt_file)
    vocab_file = os.path.join(model_dir, "vocab.txt")
    tokenizer = bert.bert_tokenization.FullTokenizer(vocab_file, do_lower_case)

    feature_dict = {}
    for i in range(len(ids)):
        id = ids[i]
        print(id)
        title = texts[i]
        tokens = tokenizer.tokenize(title)
        print(tokens)
        tokens = ["[CLS]"] + tokens + ["[SEP]"]
        token_ids = tokenizer.convert_tokens_to_ids(tokens)

        while len(token_ids) < max_seq_len:
            token_ids.append(0)
        if len(token_ids) > max_seq_len:
            token_ids = token_ids[:max_seq_len]
        print(token_ids)

        token_ids = np.array([token_ids], dtype=np.int32)

        feature = model.predict(token_ids)

        feature_dict[id] = feature.tolist()[0]

    np.save(feature_file_name,feature_dict)