def __init__(self, model_dir: str, tvu: TargetVocabUtil, max_seq_len: int, freeze_bert_layer: bool): bert_params = bert.params_from_pretrained_ckpt(model_dir) self.l_bert = bert.BertModelLayer.from_params(bert_params, name="bert") self.vu = tvu self.max_seq_len = max_seq_len self.freeze_bert_layer = freeze_bert_layer
def bert_2(self, bert_config_file=None, bert_ckpt_file=None): with tf.io.gfile.GFile(bert_config_file, "r") as reader: bert_params = params_from_pretrained_ckpt(bert_ckpt_file) l_bert = BertModelLayer.from_params(bert_params, name="bert") #l_bert.apply_adapter_freeze() #l_bert.embeddings_layer.trainable = False in_sentence = Input(shape=(150, ), dtype='int64', name="Input1") bert_output = l_bert(in_sentence) lstm_output = GlobalAveragePooling1D()(bert_output) sentence_model = Model(in_sentence, lstm_output) section_input = Input(shape=(300, 150), dtype='int64', name="Input2") section_encoded = TimeDistributed(sentence_model)(section_input) section_encoded = LSTM(300)(section_encoded) section_encoded = Dense(21)(section_encoded) section_model = Model(section_input, section_encoded) section_model.compile(optimizer="adam", loss="binary_crossentropy") sentence_model.summary() section_model.summary() return section_model
def _create_bert_layer(self): # Loads a BERT Keras layer from a downloaded pretrained module. bert_params = bert.params_from_pretrained_ckpt(self.bert_dir) bert_layer = bert.BertModelLayer.from_params(bert_params, name="bert") bert_layer.apply_adapter_freeze() checkpoint_name = os.path.join(self.bert_dir, "bert_model.ckpt.data-00000-of-00001") return bert_layer
def test_regularization(self): # create a BERT layer with config from the checkpoint bert_params = bert.params_from_pretrained_ckpt(self.ckpt_dir) max_seq_len = 12 model, l_bert = self.build_model(bert_params, max_seq_len=max_seq_len) l_bert.apply_adapter_freeze() model.summary() kernel_regularizer = keras.regularizers.l2(0.01) bias_regularizer = keras.regularizers.l2(0.01) pf.utils.add_dense_layer_loss(model, kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer) # prepare the data inputs, targets = ["hello world", "goodbye"], [1, 2] tokens = [self.tokenizer.tokenize(toks) for toks in inputs] tokens = [ self.tokenizer.convert_tokens_to_ids(toks) for toks in tokens ] tokens = [toks + [0] * (max_seq_len - len(toks)) for toks in tokens] x = np.array(tokens) y = np.array(targets) # fine tune model.fit(x, y, epochs=3)
def make_entity_start_model(bert_path, ckpt_file, max_seq_len, bert_dim): model_ckpt = bert_path + ckpt_file bert_params = params_from_pretrained_ckpt(bert_path) bert_layer = BertModelLayer.from_params(bert_params, name="bert", trainable=True) slice_fn = make_gather_entity_start_fn(bert_dim) input_ids = Input(shape=(max_seq_len, ), dtype='int32') index_ent1 = Input(shape=(2, ), dtype='int32') index_ent2 = Input(shape=(2, ), dtype='int32') bert_emb = bert_layer(input_ids) ent1_start = Lambda(lambda x: slice_fn(x))([bert_emb, index_ent1]) ent2_start = Lambda(lambda x: slice_fn(x))([bert_emb, index_ent2]) concat = concatenate([ent1_start, ent2_start]) output = Dense(2, activation='softmax')(concat) model = Model(inputs=[input_ids, index_ent1, index_ent2], outputs=output) model.build(input_shape=(None, max_seq_len)) load_bert_weights(bert_layer, model_ckpt) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def construct_bert(model_dir, timesteps, classes, dense_dropout=0.5, attention_dropout=0.3, hidden_dropout=0.3, adapter_size=8): bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt") bert_config_file = os.path.join(model_dir, "bert_config.json") bert_params = bert.params_from_pretrained_ckpt(model_dir) bert_model = bert.BertModelLayer.from_params(bert_params, name="bert") input_ids = Input(shape=(timesteps, ), dtype='int32', name="input_ids_1") token_type_ids = Input(shape=(timesteps, ), dtype='int32', name="token_type_ids_1") dense = Dense(units=768, activation="tanh", name="dense") output = bert_model([input_ids, token_type_ids ]) # output: [batch_size, max_seq_len, hidden_size] print("bert shape", output.shape) cls_out = Lambda(lambda seq: seq[:, 0:1, :])(output) cls_out = Dropout(dense_dropout)(cls_out) logits = dense(cls_out) logits = Dropout(dense_dropout)(logits) logits = Dense(units=classes, activation="softmax", name="output_1")(logits) model = Model(inputs=[input_ids, token_type_ids], outputs=logits) model.build(input_shape=(None, timesteps)) # load the pre-trained model weights load_stock_weights(bert_model, bert_ckpt_file) return model
def get_bert_config(model_dir): """Function to get the bert config params Arguments: model_dir {String} -- Path to the bert_config.json file """ return params_from_pretrained_ckpt(model_dir)
def build_transformer(transformer, max_seq_length, num_labels, tagging=True, tokenizer_only=False): if transformer in albert_models_google: from bert.tokenization.albert_tokenization import FullTokenizer model_url = albert_models_google[transformer] albert = True elif transformer in bert_models_google: from bert.tokenization.bert_tokenization import FullTokenizer model_url = bert_models_google[transformer] albert = False else: raise ValueError( f'Unknown model {transformer}, available ones: {list(bert_models_google.keys()) + list(albert_models_google.keys())}' ) bert_dir = get_resource(model_url) vocab = glob.glob(os.path.join(bert_dir, '*vocab*.txt')) assert len(vocab) == 1, 'No vocab found or unambiguous vocabs found' vocab = vocab[0] # noinspection PyTypeChecker tokenizer = FullTokenizer(vocab_file=vocab) if tokenizer_only: return tokenizer bert_params = bert.params_from_pretrained_ckpt(bert_dir) l_bert = bert.BertModelLayer.from_params(bert_params, name="bert") l_input_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype='int32', name="input_ids") l_mask_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype='int32', name="mask_ids") l_token_type_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype='int32', name="token_type_ids") output = l_bert([l_input_ids, l_token_type_ids], mask=l_mask_ids) if not tagging: output = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) if bert_params.hidden_dropout: output = tf.keras.layers.Dropout(bert_params.hidden_dropout, name='hidden_dropout')(output) logits = tf.keras.layers.Dense( num_labels, kernel_initializer=tf.keras.initializers.TruncatedNormal( bert_params.initializer_range))(output) model = tf.keras.Model(inputs=[l_input_ids, l_mask_ids, l_token_type_ids], outputs=logits) model.build(input_shape=(None, max_seq_length)) ckpt = glob.glob(os.path.join(bert_dir, '*.index')) assert ckpt, f'No checkpoint found under {bert_dir}' ckpt, _ = os.path.splitext(ckpt[0]) with stdout_redirected(to=os.devnull): if albert: skipped_weight_value_tuples = load_stock_weights(l_bert, ckpt) else: skipped_weight_value_tuples = bert.load_bert_weights(l_bert, ckpt) assert 0 == len(skipped_weight_value_tuples ), f'failed to load pretrained {transformer}' return model, tokenizer
def test_extend_pretrained_tokens(self): model_dir = tempfile.TemporaryDirectory().name os.makedirs(model_dir) save_path = MiniBertFactory.create_mini_bert_weights(model_dir) tokenizer = bert.FullTokenizer(vocab_file=os.path.join( model_dir, "vocab.txt"), do_lower_case=True) ckpt_dir = os.path.dirname(save_path) bert_params = bert.params_from_pretrained_ckpt(ckpt_dir) self.assertEqual(bert_params.token_type_vocab_size, 2) bert_params.extra_tokens_vocab_size = 3 l_bert = bert.BertModelLayer.from_params(bert_params) # we dummy call the layer once in order to instantiate the weights l_bert([np.array([[1, 1, 0]]), np.array([[1, 0, 0]])], mask=[[True, True, False]]) mismatched = bert.load_stock_weights(l_bert, save_path) self.assertEqual(0, len(mismatched), "token_type embeddings should have mismatched shape") l_bert([np.array([[1, -3, 0]]), np.array([[1, 0, 0]])], mask=[[True, True, False]])
def make_entity_border_encoder(bert_path, ckpt_file, max_seq_len, bert_dim): model_ckpt = bert_path + ckpt_file bert_params = params_from_pretrained_ckpt(bert_path) bert_layer = BertModelLayer.from_params(bert_params, name="bert", trainable=False) gather_fn = make_gather_entity_border_fn(bert_dim) input_ids = Input(shape=(max_seq_len, ), dtype='int32') index_border_ent1 = Input(shape=(2, ), dtype='int32') index_border_ent2 = Input(shape=(2, ), dtype='int32') bert_emb = bert_layer(input_ids) ent1_avg_emb = Lambda(lambda x: gather_fn(x))( [bert_emb, index_border_ent1]) ent2_avg_emb = Lambda(lambda x: gather_fn(x))( [bert_emb, index_border_ent2]) ent1_flatten = Flatten()(ent1_avg_emb) ent2_flatten = Flatten()(ent2_avg_emb) output = concatenate([ent1_flatten, ent2_flatten]) model = Model(inputs=[input_ids, index_border_ent1, index_border_ent2], outputs=output) model.build(input_shape=(None, max_seq_len)) load_bert_weights(bert_layer, model_ckpt) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def test_bert_freeze(self): model_dir = tempfile.TemporaryDirectory().name os.makedirs(model_dir) save_path = MiniBertFactory.create_mini_bert_weights(model_dir) tokenizer = bert.bert_tokenization.FullTokenizer(vocab_file=os.path.join(model_dir, "vocab.txt"), do_lower_case=True) # prepare input max_seq_len = 24 input_str_batch = ["hello, bert!", "how are you doing!"] input_ids, token_type_ids = self.prepare_input_batch(input_str_batch, tokenizer, max_seq_len) bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt") bert_params = bert.params_from_pretrained_ckpt(model_dir) bert_params.adapter_size = 4 l_bert = bert.BertModelLayer.from_params(bert_params) model = keras.models.Sequential([ l_bert, ]) model.build(input_shape=(None, max_seq_len)) model.summary() l_bert.apply_adapter_freeze() model.summary() bert.load_stock_weights(l_bert, bert_ckpt_file) #l_bert.embeddings_layer.trainable = False model.summary() orig_weight_values = [] for weight in l_bert.weights: orig_weight_values.append(weight.numpy()) model.compile(optimizer=keras.optimizers.Adam(), loss=keras.losses.mean_squared_error, run_eagerly=True) trainable_count = len(l_bert.trainable_weights) orig_pred = model.predict(input_ids) model.fit(x=input_ids, y=np.zeros_like(orig_pred), batch_size=2, epochs=4) trained_count = 0 for ndx, weight in enumerate(l_bert.weights): weight_equal = np.array_equal(weight.numpy(), orig_weight_values[ndx]) print("{}: {}".format(weight_equal, weight.name)) if not weight_equal: trained_count += 1 print(" trained weights:", trained_count) print("trainable weights:", trainable_count) self.assertEqual(trained_count, trainable_count) model.summary()
def build_transformer(): bert_params = params_from_pretrained_ckpt(FLAGS.cs_model_loc) bert_params.hidden_dropout = 1 - FLAGS.cs_kp_tfm_hidden bert_params.attention_dropout = 1 - FLAGS.cs_kp_tfm_atten return AdvBertModelLayer.from_params(bert_params, name=FLAGS.cs_tfm_type)
def run_entity_marker_cls(bert_model_dir, do_lower_case): vocab_file = os.path.join(bert_model_dir, "vocab.txt") processor = input_processors.EntityProcessor(vocab_file=vocab_file, do_lower_case=do_lower_case, max_seq_length=128) head = heads.CLSHead(n_classes=1, out_activation="sigmoid", bias_initializer="zeros", dropout_rate=0.0) inputs = processor.get_input_placeholders() bert_params = bert.params_from_pretrained_ckpt(bert_model_dir) bert_params["vocab_size"] = processor.vocab_size model_ckpt = os.path.join(args.bert_model_dir, "bert_model.ckpt") # Calls model.build() model = get_bert_classifier(inputs, bert_params, model_ckpt, head) opt = tf.keras.optimizers.Adam(learning_rate=3e-5) loss_fn = "binary_crossentropy" model.compile(optimizer=opt, loss=loss_fn) bert_inputs, tokenized_docs = processor.process(docs["entity"]) loss = model.evaluate(bert_inputs, np.array([1])) print() print("=== Entity CLS ===") print(docs["entity"]) print(tokenized_docs) print(bert_inputs) print() model.summary() print(f"Loss: {loss}")
def create_model(max_seq_len, model_dir, model_ckpt, freeze=True, adapter_size=4): bert_params = bert.params_from_pretrained_ckpt(model_dir) print(f'bert params: {bert_params}') bert_params.adapter_size = adapter_size bert_params.adapter_init_scale = 1e-5 l_bert = bert.BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") bert_output = l_bert(input_ids) print("bert shape", bert_output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :], name='lambda')(bert_output) cls_out = keras.layers.Dropout(0.5)(cls_out) logits = keras.layers.Dense(name='dense_sin', units=768, activation=tf.math.sin)(cls_out) # logits = keras.layers.Dense(name='dense_tanh', units=768, activation="tanh")(cls_out) # logits = keras.layers.Dense(name='dense_relu', units=256, activation="relu")(cls_out) # logits = keras.layers.Dense(name='dense_gelu', units=256, activation="gelu")(cls_out) logits = keras.layers.BatchNormalization()(logits) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(name='initial_predictions', units=len(classes), activation="softmax")(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) model.summary() if freeze: l_bert.apply_adapter_freeze() l_bert.embeddings_layer.trainable = False model.summary() # Дополнительная инфа https://arxiv.org/abs/1902.00751 # apply global regularization on all trainable dense layers pf.utils.add_dense_layer_loss( model, kernel_regularizer=keras.regularizers.l2(0.01), bias_regularizer=keras.regularizers.l2(0.01)) model.compile( optimizer=pf.optimizers.RAdam(), # loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), # c логитами почему-то не работает совсем loss=keras.losses.SparseCategoricalCrossentropy(from_logits=False), metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")]) bert.load_stock_weights(l_bert, model_ckpt) # bert.load_bert_weights(l_bert, model_ckpt) return model
def create_bert_layer(): global bert_layer bert_params = bert.params_from_pretrained_ckpt(models_folder) bert_layer = bert.BertModelLayer.from_params(bert_params, name="bert") # with adapter bert_layer.apply_adapter_freeze()
def create_bert_layer() -> bert.BertModelLayer: bert_params = bert.params_from_pretrained_ckpt(BERT_DIR) bert_params.mask_zero = True bert_layer = bert.BertModelLayer.from_params(bert_params, name='bert') bert_layer.apply_adapter_freeze() return bert_layer
def test_extend_pretrained_segments(self): model_dir = tempfile.TemporaryDirectory().name os.makedirs(model_dir) save_path = MiniBertFactory.create_mini_bert_weights(model_dir) tokenizer = bert.FullTokenizer(vocab_file=os.path.join( model_dir, "vocab.txt"), do_lower_case=True) ckpt_dir = os.path.dirname(save_path) bert_params = bert.params_from_pretrained_ckpt(ckpt_dir) self.assertEqual(bert_params.token_type_vocab_size, 2) bert_params.token_type_vocab_size = 4 l_bert = bert.BertModelLayer.from_params(bert_params) # we dummy call the layer once in order to instantiate the weights l_bert([np.array([[1, 1, 0]]), np.array([[1, 0, 0]])]) #, mask=[[True, True, False]]) # # - load the weights from a pre-trained model, # - expect a mismatch for the token_type embeddings # - use the segment/token type id=0 embedding for the missing token types # mismatched = bert.load_stock_weights(l_bert, save_path) self.assertEqual(1, len(mismatched), "token_type embeddings should have mismatched shape") for weight, value in mismatched: if re.match("(.*)embeddings/token_type_embeddings/embeddings:0", weight.name): seg0_emb = value[:1, :] new_segment_embeddings = np.repeat( seg0_emb, (weight.shape[0] - value.shape[0]), axis=0) new_value = np.concatenate([value, new_segment_embeddings], axis=0) keras.backend.batch_set_value([(weight, new_value)]) tte = l_bert.embeddings_layer.token_type_embeddings_layer.weights[0] if not tf.executing_eagerly(): with tf.keras.backend.get_session() as sess: tte, = sess.run((tte, )) self.assertTrue(np.allclose(seg0_emb, tte[0], 1e-6)) self.assertFalse(np.allclose(seg0_emb, tte[1], 1e-6)) self.assertTrue(np.allclose(seg0_emb, tte[2], 1e-6)) self.assertTrue(np.allclose(seg0_emb, tte[3], 1e-6)) bert_params.token_type_vocab_size = 4 print("token_type_vocab_size", bert_params.token_type_vocab_size) print(l_bert.embeddings_layer.trainable_weights[1])
def test_albert_chinese_weights(self): albert_model_name = "albert_base" albert_dir = bert.fetch_brightmart_albert_model(albert_model_name, ".models") albert_ckpt = os.path.join(albert_dir, "albert_model.ckpt") albert_params = bert.params_from_pretrained_ckpt(albert_dir) model, l_bert = self.build_model(albert_params) skipped_weight_value_tuples = bert.load_albert_weights(l_bert, albert_ckpt) self.assertEqual(0, len(skipped_weight_value_tuples)) model.summary()
def buildModel(self): bert_params = bert.params_from_pretrained_ckpt(self.preModelPath) inputLayer1 = keras.layers.Input(shape=(self.maxLen, ), dtype='int32') embeddingLayer1 = keras.layers.Embedding( input_dim=self.vocabSize + 1, output_dim=self.hiddenSize, input_length=self.maxLen, )(inputLayer1) reshapeLayer1 = keras.layers.Reshape( (self.maxLen, self.hiddenSize, 1))(embeddingLayer1) inputLayer2 = keras.layers.Input(shape=(self.maxLen, ), dtype='int32') embeddingLayer2 = keras.layers.Embedding( input_dim=self.vocabSize + 1, output_dim=self.hiddenSize, input_length=self.maxLen)(inputLayer2) reshapeLayer2 = keras.layers.Reshape( (self.maxLen, self.hiddenSize, 1))(embeddingLayer2) cnnLayer1 = keras.layers.Conv2D( 3, kernel_size=(3, self.hiddenSize))(reshapeLayer1) poolingLayer1 = keras.layers.MaxPool2D(pool_size=(3, 1))(cnnLayer1) flattenLayer1 = keras.layers.Flatten()(poolingLayer1) denseLayer1 = keras.layers.Dense(128, activation="tanh")(flattenLayer1) denseLayer1 = keras.layers.Dense(64, activation="tanh")(denseLayer1) denseLayer1 = keras.layers.Dense(32, activation="tanh")(denseLayer1) cnnLayer2 = keras.layers.Conv2D( 3, kernel_size=(3, self.hiddenSize))(reshapeLayer2) poolingLayer2 = keras.layers.MaxPool2D(pool_size=(3, 1))(cnnLayer2) flattenLayer2 = keras.layers.Flatten()(poolingLayer2) denseLayer2 = keras.layers.Dense(128, activation="tanh")(flattenLayer2) denseLayer2 = keras.layers.Dense(64, activation="tanh")(denseLayer2) denseLayer2 = keras.layers.Dense(32, activation="tanh")(denseLayer2) BLC1 = keras.layers.LayerNormalization()(denseLayer1) BLC2 = keras.layers.LayerNormalization()(denseLayer2) multLayer = keras.layers.Dot(axes=1, normalize=True)([BLC1, BLC2]) nlLayer = 1 - multLayer concatLayer = keras.layers.concatenate([nlLayer, multLayer], axis=-1) # denseLayer=keras.layers.Dense(64,activation="tanh")(multLayer) # denseLayer=keras.layers.Dense(32,activation="tanh")(denseLayer) # denseLayer=keras.layers.Dense(16,activation="tanh")(denseLayer) # outputLayer=keras.layers.Dense(2,name="classifier",activation="softmax")(denseLayer) self.model = keras.models.Model([inputLayer1, inputLayer2], concatLayer) self.model.compile(loss=self.amsoftmax_loss, optimizer=tf.keras.optimizers.Adam( learning_rate=self.learning_rate))
def test_bert_google_weights(self): bert_model_name = "uncased_L-12_H-768_A-12" bert_dir = bert.fetch_google_bert_model(bert_model_name, ".models") bert_ckpt = os.path.join(bert_dir, "bert_model.ckpt") bert_params = bert.params_from_pretrained_ckpt(bert_dir) model, l_bert = self.build_model(bert_params) skipped_weight_value_tuples = bert.load_bert_weights(l_bert, bert_ckpt) self.assertEqual(0, len(skipped_weight_value_tuples)) model.summary()
def bLayer(): global bert_layer pTrain_dir = 'cased_L-12_H-768_A-12' bert_params = bert.params_from_pretrained_ckpt(pTrain_dir) bert_layer = bert.BertModelLayer.from_params(bert_params, name="bert") bert_layer.apply_adapter_freeze() bert_layer.trainable = True
def test_albert_zh_fetch_and_load(self): albert_model_name = "albert_tiny" albert_dir = bert.fetch_brightmart_albert_model( albert_model_name, ".models") model_params = bert.params_from_pretrained_ckpt(albert_dir) model_params.vocab_size = model_params.vocab_size + 2 model_params.adapter_size = 1 l_bert = bert.BertModelLayer.from_params(model_params, name="albert") l_bert(tf.zeros((1, 128))) res = bert.load_albert_weights(l_bert, albert_dir) self.assertTrue(len(res) > 0)
def test_coverage_improve(self): bert_params = bert.params_from_pretrained_ckpt(self.ckpt_dir) model, l_bert = self.build_model(bert_params, 1) for weight in model.weights: try: name = bert.loader.map_to_stock_variable_name( weight.name, weight.name.split("/")[0]) stock_name = bert.loader.map_from_stock_variale_name(name) self.assertEqual(name, stock_name) except: print(weight.name)
def create_bert_model(self): bert_params = bert.params_from_pretrained_ckpt(self.model_dir) l_bert = bert.BertModelLayer.from_params(bert_params, name="bert") l_input_ids = tf.keras.layers.Input(shape=(self.max_seq_length, ), dtype='int32') output = l_bert(l_input_ids) model = tf.keras.Model(inputs=l_input_ids, outputs=output) model.build(input_shape=(None, self.max_seq_length)) bert.load_stock_weights(l_bert, self.model_ckpt) return model
def __init__(self, lang='en', adapter_size=None): model_name = lang_models[lang] model_dir = os.path.join(root_dir, model_name) self.bert_params = bert.params_from_pretrained_ckpt(model_dir) self.bert_params.adapter_size = adapter_size self.model_ckpt = os.path.join(model_dir, "bert_model.ckpt") self.l_bert = bert.BertModelLayer.from_params(self.bert_params, name="bert") do_lower_case = not (model_name.find("cased") == 0 or model_name.find("multi_cased") == 0) bert.bert_tokenization.validate_case_matches_checkpoint(do_lower_case, self.model_ckpt) vocab_file = os.path.join(model_dir, "vocab.txt") self.tokenizer = bert.bert_tokenization.FullTokenizer(vocab_file, do_lower_case)
def create_model( model_dir, model_type, max_seq_len, n_classes, load_pretrained_weights=True, summary=False, ): """Creates keras model with pretrained BERT/ALBERT layer. Args: model_dir: String. Path to model. model_type: String. Expects either "albert" or "bert" max_seq_len: Int. Maximum length of a classificaton example. n_classes: Int. Number of training classes. load_pretrained_weights: Boolean. Load pretrained model weights. summary: Boolean. Print model summary. Returns: Keras model """ if model_type == "albert": model_ckpt = os.path.join(model_dir, "model.ckpt-best") model_params = bert.albert_params(model_dir) elif model_type == "bert": model_ckpt = os.path.join(model_dir, "bert_model.ckpt") model_params = bert.params_from_pretrained_ckpt(model_dir) layer_bert = bert.BertModelLayer.from_params(model_params, name=model_type) input_ids = keras.layers.Input(shape=(max_seq_len,), dtype="int32", name="input_ids") output = layer_bert(input_ids) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) cls_out = keras.layers.Dropout(0.5)(cls_out) logits = keras.layers.Dense(units=model_params["hidden_size"], activation="relu")(cls_out) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(units=n_classes, activation="softmax")(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) if load_pretrained_weights: if model_type == "albert": bert.load_albert_weights(layer_bert, model_ckpt) elif model_type == "bert": bert.load_bert_weights(layer_bert, model_ckpt) model.compile( optimizer=keras.optimizers.Adam(), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")], ) if summary: model.summary() return model
def buildModel(self): inputLayer = keras.layers.Input(shape=(self.maxLen,), dtype='int32') bert_params = bert.params_from_pretrained_ckpt(self.preModelPath) bertLayer = bert.BertModelLayer.from_params(bert_params, name="bert")(inputLayer) flattenLayer = keras.layers.Flatten()(bertLayer) outputLayer = keras.layers.Dense( self.classNum, activation="softmax")(flattenLayer) self.model = keras.models.Model(inputLayer,outputLayer) self.model.compile(loss="SparseCategoricalCrossentropy", optimizer=tf.keras.optimizers.RMSprop(learning_rate=self.learning_rate))
def test_coverage_improve(self): bert_params = bert.params_from_pretrained_ckpt(self.ckpt_dir) model, l_bert = self.build_model(bert_params, 1) for weight in model.weights: l_bert_prefix = bert.loader.bert_prefix(l_bert) stock_name = bert.loader.map_to_stock_variable_name(weight.name, l_bert_prefix) if stock_name is None: print("No BERT stock weight for", weight.name) continue keras_name = bert.loader.map_from_stock_variale_name(stock_name, l_bert_prefix) self.assertEqual(weight.name.split(":")[0], keras_name)
def __init__(self, embedding_size=100, hidden_size=100, bidirectional=True, layer_size=1, dropout=.5, recurrent_dropout=.0, embedding_weights=None, embedding_trainable=True, vocab_file=None, bert=False, bert_model_dir=None, bert_max_length=4096, bert_params=None, bert_num_layers=None, bert_trainable=False, learning_rate=None, optimizer='Adam', loss='crf'): if bert: assert bert_params is not None or bert_model_dir is not None if bert_params is None: self.bert_params = params_from_pretrained_ckpt(bert_model_dir) else: self.bert_params = None self.embedding_size = embedding_size self.hidden_size = hidden_size self.bidirectional = bidirectional self.layer_size = layer_size self.dropout = dropout self.recurrent_dropout = recurrent_dropout self.model = None self.vocab_file = vocab_file self.bert = bert self.bert_model_dir = bert_model_dir self.bert_max_length = bert_max_length self.bert_num_layers = bert_num_layers self.bert_trainable = bert_trainable self.embedding_weights = embedding_weights self.embedding_trainable = embedding_trainable self.learning_rate = learning_rate self.optimizer = optimizer self.loss = loss self.tokenizer = None self.label = None self.batch_size = 32
def Bert_feature_extraction(ids,texts, max_seq_len, feature_file_name): #https://github.com/kpe/bert-for-tf2 model_dir = ".models/uncased_L-12_H-768_A-12/uncased_L-12_H-768_A-12" bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt") bert_params = bert.params_from_pretrained_ckpt(model_dir) l_bert = bert.BertModelLayer.from_params(bert_params, name="bert") l_input_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32') l_token_type_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32') # using the default token_type/segment id 0 output = l_bert(l_input_ids) # output: [batch_size, max_seq_len, hidden_size] output = keras.layers.GlobalAveragePooling1D()(output) model = keras.Model(inputs=l_input_ids, outputs=output) model.build(input_shape=(None, max_seq_len)) bert.load_stock_weights(l_bert, bert_ckpt_file) do_lower_case = not (model_dir.find("cased") == 0 or model_dir.find("multi_cased") == 0) bert.bert_tokenization.validate_case_matches_checkpoint(do_lower_case, bert_ckpt_file) vocab_file = os.path.join(model_dir, "vocab.txt") tokenizer = bert.bert_tokenization.FullTokenizer(vocab_file, do_lower_case) feature_dict = {} for i in range(len(ids)): id = ids[i] print(id) title = texts[i] tokens = tokenizer.tokenize(title) print(tokens) tokens = ["[CLS]"] + tokens + ["[SEP]"] token_ids = tokenizer.convert_tokens_to_ids(tokens) while len(token_ids) < max_seq_len: token_ids.append(0) if len(token_ids) > max_seq_len: token_ids = token_ids[:max_seq_len] print(token_ids) token_ids = np.array([token_ids], dtype=np.int32) feature = model.predict(token_ids) feature_dict[id] = feature.tolist()[0] np.save(feature_file_name,feature_dict)