def make_entity_border_encoder(bert_path, ckpt_file, max_seq_len, bert_dim): model_ckpt = bert_path + ckpt_file bert_params = params_from_pretrained_ckpt(bert_path) bert_layer = BertModelLayer.from_params(bert_params, name="bert", trainable=False) gather_fn = make_gather_entity_border_fn(bert_dim) input_ids = Input(shape=(max_seq_len, ), dtype='int32') index_border_ent1 = Input(shape=(2, ), dtype='int32') index_border_ent2 = Input(shape=(2, ), dtype='int32') bert_emb = bert_layer(input_ids) ent1_avg_emb = Lambda(lambda x: gather_fn(x))( [bert_emb, index_border_ent1]) ent2_avg_emb = Lambda(lambda x: gather_fn(x))( [bert_emb, index_border_ent2]) ent1_flatten = Flatten()(ent1_avg_emb) ent2_flatten = Flatten()(ent2_avg_emb) output = concatenate([ent1_flatten, ent2_flatten]) model = Model(inputs=[input_ids, index_border_ent1, index_border_ent2], outputs=output) model.build(input_shape=(None, max_seq_len)) load_bert_weights(bert_layer, model_ckpt) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def make_entity_start_model(bert_path, ckpt_file, max_seq_len, bert_dim): model_ckpt = bert_path + ckpt_file bert_params = params_from_pretrained_ckpt(bert_path) bert_layer = BertModelLayer.from_params(bert_params, name="bert", trainable=True) slice_fn = make_gather_entity_start_fn(bert_dim) input_ids = Input(shape=(max_seq_len, ), dtype='int32') index_ent1 = Input(shape=(2, ), dtype='int32') index_ent2 = Input(shape=(2, ), dtype='int32') bert_emb = bert_layer(input_ids) ent1_start = Lambda(lambda x: slice_fn(x))([bert_emb, index_ent1]) ent2_start = Lambda(lambda x: slice_fn(x))([bert_emb, index_ent2]) concat = concatenate([ent1_start, ent2_start]) output = Dense(2, activation='softmax')(concat) model = Model(inputs=[input_ids, index_ent1, index_ent2], outputs=output) model.build(input_shape=(None, max_seq_len)) load_bert_weights(bert_layer, model_ckpt) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def load_pretrained_weights(bert_layer, model_name, fetch_dir=None): if getattr(bert_layer, "is_hf", False): # The RoBERTa layer will already have the pretrained weights loaded. return bert_layer ckpt = get_pretrained_checkpoint(model_name, fetch_dir=fetch_dir) bert.load_bert_weights(bert_layer, ckpt) return bert_layer
def create_model( model_dir, model_type, max_seq_len, n_classes, load_pretrained_weights=True, summary=False, ): """Creates keras model with pretrained BERT/ALBERT layer. Args: model_dir: String. Path to model. model_type: String. Expects either "albert" or "bert" max_seq_len: Int. Maximum length of a classificaton example. n_classes: Int. Number of training classes. load_pretrained_weights: Boolean. Load pretrained model weights. summary: Boolean. Print model summary. Returns: Keras model """ if model_type == "albert": model_ckpt = os.path.join(model_dir, "model.ckpt-best") model_params = bert.albert_params(model_dir) elif model_type == "bert": model_ckpt = os.path.join(model_dir, "bert_model.ckpt") model_params = bert.params_from_pretrained_ckpt(model_dir) layer_bert = bert.BertModelLayer.from_params(model_params, name=model_type) input_ids = keras.layers.Input(shape=(max_seq_len,), dtype="int32", name="input_ids") output = layer_bert(input_ids) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) cls_out = keras.layers.Dropout(0.5)(cls_out) logits = keras.layers.Dense(units=model_params["hidden_size"], activation="relu")(cls_out) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(units=n_classes, activation="softmax")(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) if load_pretrained_weights: if model_type == "albert": bert.load_albert_weights(layer_bert, model_ckpt) elif model_type == "bert": bert.load_bert_weights(layer_bert, model_ckpt) model.compile( optimizer=keras.optimizers.Adam(), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")], ) if summary: model.summary() return model
def load_pretrained_weights(model, pretrained_model, fetch_dir=None): bert_ckpt = bert_common.get_pretrained_checkpoint( pretrained_model, fetch_dir=fetch_dir ) # We have to do this ugly hack as the load_bert_weights method checks if the # model is an instance of BertModelLayer. old_isinstance = builtins.isinstance builtins.isinstance = ( lambda x, y: True if y == bert.BertModelLayer else old_isinstance(x, y) ) bert.load_bert_weights(model, bert_ckpt, _our_map_to_stock_variable_name) builtins.isinstance = old_isinstance return model
def build_transformer(transformer, max_seq_length, num_labels, tagging=True, tokenizer_only=False): if transformer in albert_models_google: from bert.tokenization.albert_tokenization import FullTokenizer model_url = albert_models_google[transformer] albert = True elif transformer in bert_models_google: from bert.tokenization.bert_tokenization import FullTokenizer model_url = bert_models_google[transformer] albert = False else: raise ValueError( f'Unknown model {transformer}, available ones: {list(bert_models_google.keys()) + list(albert_models_google.keys())}' ) bert_dir = get_resource(model_url) vocab = glob.glob(os.path.join(bert_dir, '*vocab*.txt')) assert len(vocab) == 1, 'No vocab found or unambiguous vocabs found' vocab = vocab[0] # noinspection PyTypeChecker tokenizer = FullTokenizer(vocab_file=vocab) if tokenizer_only: return tokenizer bert_params = bert.params_from_pretrained_ckpt(bert_dir) l_bert = bert.BertModelLayer.from_params(bert_params, name="bert") l_input_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype='int32', name="input_ids") l_mask_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype='int32', name="mask_ids") l_token_type_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype='int32', name="token_type_ids") output = l_bert([l_input_ids, l_token_type_ids], mask=l_mask_ids) if not tagging: output = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) if bert_params.hidden_dropout: output = tf.keras.layers.Dropout(bert_params.hidden_dropout, name='hidden_dropout')(output) logits = tf.keras.layers.Dense( num_labels, kernel_initializer=tf.keras.initializers.TruncatedNormal( bert_params.initializer_range))(output) model = tf.keras.Model(inputs=[l_input_ids, l_mask_ids, l_token_type_ids], outputs=logits) model.build(input_shape=(None, max_seq_length)) ckpt = glob.glob(os.path.join(bert_dir, '*.index')) assert ckpt, f'No checkpoint found under {bert_dir}' ckpt, _ = os.path.splitext(ckpt[0]) with stdout_redirected(to=os.devnull): if albert: skipped_weight_value_tuples = load_stock_weights(l_bert, ckpt) else: skipped_weight_value_tuples = bert.load_bert_weights(l_bert, ckpt) assert 0 == len(skipped_weight_value_tuples ), f'failed to load pretrained {transformer}' return model, tokenizer
def __init__(self, model_dir, d_model, args): super(BertEncoder, self).__init__(trainable=False) bert_params = bert.params_from_pretrained_ckpt(model_dir) self.bert_layer = bert.BertModelLayer.from_params(bert_params, name="bert_layer") self.model_dir = model_dir tf.compat.v1.logging.info( 'bert model loaded from {}'.format(model_dir)) tf.compat.v1.logging.info('bert model params: {}'.format(bert_params)) # do dummy call to build the model indirectly self.bert_layer([ tf.zeros([args.batch_size, args.seq_length], dtype=tf.dtypes.int64), tf.zeros([args.batch_size, args.seq_length], dtype=tf.dtypes.int64) ]) bert.load_bert_weights(self.bert_layer, os.path.join(self.model_dir, "bert_model.ckpt")) tf.compat.v1.logging.info('bert weights loaded')
def test_bert_google_weights(self): bert_model_name = "uncased_L-12_H-768_A-12" bert_dir = bert.fetch_google_bert_model(bert_model_name, ".models") bert_ckpt = os.path.join(bert_dir, "bert_model.ckpt") bert_params = bert.params_from_pretrained_ckpt(bert_dir) model, l_bert = self.build_model(bert_params) skipped_weight_value_tuples = bert.load_bert_weights(l_bert, bert_ckpt) self.assertEqual(0, len(skipped_weight_value_tuples)) model.summary()
def make_cls_encoder(bert_path, ckpt_file, max_seq_len, bert_dim): model_ckpt = bert_path + ckpt_file bert_params = params_from_pretrained_ckpt(bert_path) bert_layer = BertModelLayer.from_params(bert_params, name="bert", trainable=False) input_ids = Input(shape=(max_seq_len, ), dtype='int32') bert_emb = bert_layer(input_ids) output = Lambda(lambda x: tf.gather(x, indices=0, axis=1))(bert_emb) model = Model(inputs=input_ids, outputs=output) model.build(input_shape=(None, max_seq_len)) load_bert_weights(bert_layer, model_ckpt) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def get_bert_classifier(inputs, bert_params, model_ckpt, classification_head, logging_fn=print): if len(inputs) < 3: raise ValueError("BERT inputs must be of length 3") params_str = "Initializing BERT layer with params:" for (k, v) in bert_params.items(): params_str += f"\n {k}: {v}" logging_fn(params_str) # inputs[:3] are always [word_ids, token_mask, token_type_ids] bert_inputs = [inputs[0], inputs[2]] bert_layer = bert.BertModelLayer.from_params(bert_params, name="bert") seq_output = bert_layer(bert_inputs) # inputs[3:] are any arguments to pass to the prediction layer. # E.g. entity masks. Can be emtpy. args = [seq_output] + inputs[3:] predictions = classification_head(*args) model = tf.keras.Model(inputs=inputs, outputs=predictions, name=f"bert_{classification_head.name}") input_shapes = [inp.shape for inp in inputs] model.build(input_shape=input_shapes) skipped = bert.load_bert_weights(bert_layer, model_ckpt) # Probably because we extended the vocabulary. if len(skipped) == 1: skipped_param, ckpt_value = skipped[0] emb_name = "bert/embeddings/word_embeddings/embeddings:0" if skipped_param.name == emb_name: old_vocab_size = ckpt_value.shape[0] new_vocab_size = bert_params["vocab_size"] logging_fn( f"Extending pretrained BERT embeddings: {old_vocab_size} -> {new_vocab_size}" ) # noqa extended_embeddings = extend_ckpt_embeddings( ckpt_value, new_vocab_size) tf.keras.backend.set_value(skipped_param, extended_embeddings) else: raise ValueError(f"Skipped loading params: {skipped}") elif len(skipped) > 1: raise ValueError(f"Skipped loading params: {skipped}") else: pass return model
def build_transformer(transformer, max_seq_length=None, num_labels=None, tagging=True, tokenizer_only=False): spm_model_file = None if transformer in zh_albert_models_google: from bert.tokenization.albert_tokenization import FullTokenizer model_url = zh_albert_models_google[transformer] albert = True elif transformer in albert_models_tfhub: from edparser.layers.transformers.albert_tokenization import FullTokenizer with stdout_redirected(to=os.devnull): model_url = fetch_tfhub_albert_model(transformer, os.path.join(hanlp_home(), 'thirdparty', 'tfhub.dev', 'google', transformer)) albert = True spm_model_file = glob.glob(os.path.join(model_url, 'assets', '*.model')) assert len(spm_model_file) == 1, 'No vocab found or unambiguous vocabs found' spm_model_file = spm_model_file[0] elif transformer in bert_models_google: from bert.tokenization.bert_tokenization import FullTokenizer model_url = bert_models_google[transformer] albert = False else: raise ValueError( f'Unknown model {transformer}, available ones: {list(bert_models_google.keys()) + list(zh_albert_models_google.keys()) + list(albert_models_tfhub.keys())}') bert_dir = get_resource(model_url) if spm_model_file: vocab = glob.glob(os.path.join(bert_dir, 'assets', '*.vocab')) else: vocab = glob.glob(os.path.join(bert_dir, '*vocab*.txt')) assert len(vocab) == 1, 'No vocab found or unambiguous vocabs found' vocab = vocab[0] lower_case = any(key in transformer for key in ['uncased', 'multilingual', 'chinese', 'albert']) if spm_model_file: # noinspection PyTypeChecker tokenizer = FullTokenizer(vocab_file=vocab, spm_model_file=spm_model_file, do_lower_case=lower_case) else: tokenizer = FullTokenizer(vocab_file=vocab, do_lower_case=lower_case) if tokenizer_only: return tokenizer if spm_model_file: bert_params = albert_params(bert_dir) else: bert_params = bert.params_from_pretrained_ckpt(bert_dir) l_bert = bert.BertModelLayer.from_params(bert_params, name='albert' if albert else "bert") if not max_seq_length: return l_bert, tokenizer, bert_dir l_input_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype='int32', name="input_ids") l_mask_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype='int32', name="mask_ids") l_token_type_ids = tf.keras.layers.Input(shape=(max_seq_length,), dtype='int32', name="token_type_ids") output = l_bert([l_input_ids, l_token_type_ids], mask=l_mask_ids) if not tagging: output = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) if bert_params.hidden_dropout: output = tf.keras.layers.Dropout(bert_params.hidden_dropout, name='hidden_dropout')(output) logits = tf.keras.layers.Dense(num_labels, kernel_initializer=tf.keras.initializers.TruncatedNormal( bert_params.initializer_range))(output) model = tf.keras.Model(inputs=[l_input_ids, l_mask_ids, l_token_type_ids], outputs=logits) model.build(input_shape=(None, max_seq_length)) if not spm_model_file: ckpt = glob.glob(os.path.join(bert_dir, '*.index')) assert ckpt, f'No checkpoint found under {bert_dir}' ckpt, _ = os.path.splitext(ckpt[0]) with stdout_redirected(to=os.devnull): if albert: if spm_model_file: skipped_weight_value_tuples = bert.load_albert_weights(l_bert, bert_dir) else: # noinspection PyUnboundLocalVariable skipped_weight_value_tuples = load_stock_weights(l_bert, ckpt) else: # noinspection PyUnboundLocalVariable skipped_weight_value_tuples = bert.load_bert_weights(l_bert, ckpt) assert 0 == len(skipped_weight_value_tuples), f'failed to load pretrained {transformer}' return model, tokenizer
def post_build_model(self): bert.load_bert_weights(self.l_bert, self.model_ckpt)
tokens = tokenizer.tokenize(text.numpy())[:MAX_LEN - 2] tokens = ['[CLS]'] + tokens + ['[SEP]'] token_ids = tokenizer.convert_tokens_to_ids(tokens) return token_ids, label return tf.py_function(_tokenize, [text, label], [tf.int32, tf.int64]) train = train.map(tokenize).padded_batch(128, padded_shapes=([MAX_LEN], [])) valid = valid.map(tokenize).padded_batch(128, padded_shapes=([MAX_LEN], [])) # Construct a classifier with BERT bert_layer = bert.BertModelLayer.from_params(bert_params) bert_layer.trainable = False model = Sequential([ Input(shape=(MAX_LEN, )), bert_layer, Lambda(lambda seq: seq[:, 0, :]), Dense(1), ]) bert.load_bert_weights(bert_layer, bert_model_ckpt) # Train it! model.compile(Adam(), BinaryCrossentropy(True), ['accuracy']) model.fit(train, validation_data=valid, epochs=5) # Save the model model.save('models/imdb_bert')
X_test = pad_sequences(X_test, padding='post', maxlen=maxlen) # creating model: String with simple model to test result # Pending including bert embeddings (if they exists) to improve models # so far only using BERT tokenizer. l_input_ids = keras.layers.Input(shape=(maxlen, ), dtype='int32') l_token_type_ids = keras.layers.Input(shape=(maxlen, ), dtype='int32') output = l_bert(l_input_ids) LSTM_Layer_1 = keras.layers.LSTM(128)(output) logits = keras.layers.Dense(numclass, activation='softmax')(LSTM_Layer_1) model = keras.Model(inputs=l_input_ids, outputs=logits) model.build(input_shape=(None, maxlen)) bert.load_bert_weights(l_bert, model_ckpt) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc']) model.summary() history = model.fit(X_train, y_train, batch_size=128, epochs=1, verbose=1, validation_split=0.2, class_weight=class_weights)