def test_multi(self): model_dir = self.model_dir print(model_dir) bert_config_file = os.path.join(model_dir, "bert_config.json") bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt") with tf.io.gfile.GFile(bert_config_file, "r") as reader: stock_params = StockBertConfig.from_json_string(reader.read()) bert_params = stock_params.to_bert_model_layer_params() l_bert = BertModelLayer.from_params(bert_params, name="bert") max_seq_len = 128 l_input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") l_token_type_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="token_type_ids") output = l_bert([l_input_ids, l_token_type_ids]) model = keras.Model(inputs=[l_input_ids, l_token_type_ids], outputs=output) model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)]) load_stock_weights(l_bert, bert_ckpt_file)
def create_model(max_seq_len, bert_ckpt_file): with tf.io.gfile.GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") bert_output = bert(input_ids) print("bert shape", bert_output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output) cls_out = keras.layers.Dropout(0.5)(cls_out) logits = keras.layers.Dense(units=768, activation="tanh")(cls_out) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(units=len(classes), activation="softmax")(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) load_stock_weights(bert, bert_ckpt_file) return model
def load_keras_model(model_dir, max_seq_len): # keras 加载BERT from tensorflow.python import keras from bert import BertModelLayer from bert.loader import StockBertConfig, load_stock_weights bert_config_file = os.path.join(model_dir, "bert_config.json") bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt") with tf.io.gfile.GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) l_bert = BertModelLayer.from_params(bc.to_bert_model_layer_params(), name="bert") l_input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") l_token_type_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="token_type_ids") l = l_bert([l_input_ids, l_token_type_ids]) l = Lambda(lambda x: x[:, 0])(l) output = keras.layers.Dense(1, activation=keras.activations.sigmoid)(l) model = keras.Model(inputs=[l_input_ids, l_token_type_ids], outputs=output) model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)]) load_stock_weights(l_bert, bert_ckpt_file) return model
def bert_2(self, bert_config_file=None, bert_ckpt_file=None): with tf.io.gfile.GFile(bert_config_file, "r") as reader: bert_params = params_from_pretrained_ckpt(bert_ckpt_file) l_bert = BertModelLayer.from_params(bert_params, name="bert") #l_bert.apply_adapter_freeze() #l_bert.embeddings_layer.trainable = False in_sentence = Input(shape=(150, ), dtype='int64', name="Input1") bert_output = l_bert(in_sentence) lstm_output = GlobalAveragePooling1D()(bert_output) sentence_model = Model(in_sentence, lstm_output) section_input = Input(shape=(300, 150), dtype='int64', name="Input2") section_encoded = TimeDistributed(sentence_model)(section_input) section_encoded = LSTM(300)(section_encoded) section_encoded = Dense(21)(section_encoded) section_model = Model(section_input, section_encoded) section_model.compile(optimizer="adam", loss="binary_crossentropy") sentence_model.summary() section_model.summary() return section_model
def create_model(max_seq_len,adapter_size = 64): # Adapter size for adapter-bert # Creating Base Layer from bert_config with tf.io.gfile.GFile(BERT_CONFIG_FILE, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = adapter_size bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(MAX_SEQ_LEN,), dtype='int32', name="input_ids") output = bert(input_ids) print("bert shape", output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) cls_out = keras.layers.Dropout(0.5)(cls_out) logits = keras.layers.Dense(units=768, activation="tanh")(cls_out) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(units=2, activation="softmax")(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) load_stock_weights(bert, BERT_CKPT_FILE) if adapter_size is not None: freeze_bert_layers(bert) model.compile(optimizer=keras.optimizers.Adam(), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")]) print(model.summary()) return model
def test_load_pretrained(self): print("Eager Execution:", tf.executing_eagerly()) bert_params = loader.params_from_pretrained_ckpt(self.bert_ckpt_dir) bert_params.adapter_size = 32 bert = BertModelLayer.from_params(bert_params, name="bert") model = keras.models.Sequential([ keras.layers.InputLayer(input_shape=(128, )), bert, keras.layers.Lambda(lambda x: x[:, 0, :]), keras.layers.Dense(2) ]) # we need to freeze before build/compile - otherwise keras counts the params twice if bert_params.adapter_size is not None: freeze_bert_layers(bert) model.build(input_shape=(None, 128)) model.compile( optimizer=keras.optimizers.Adam(), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")]) load_stock_weights(bert, self.bert_ckpt_file) model.summary()
def make_entity_border_encoder(bert_path, ckpt_file, max_seq_len, bert_dim): model_ckpt = bert_path + ckpt_file bert_params = params_from_pretrained_ckpt(bert_path) bert_layer = BertModelLayer.from_params(bert_params, name="bert", trainable=False) gather_fn = make_gather_entity_border_fn(bert_dim) input_ids = Input(shape=(max_seq_len, ), dtype='int32') index_border_ent1 = Input(shape=(2, ), dtype='int32') index_border_ent2 = Input(shape=(2, ), dtype='int32') bert_emb = bert_layer(input_ids) ent1_avg_emb = Lambda(lambda x: gather_fn(x))( [bert_emb, index_border_ent1]) ent2_avg_emb = Lambda(lambda x: gather_fn(x))( [bert_emb, index_border_ent2]) ent1_flatten = Flatten()(ent1_avg_emb) ent2_flatten = Flatten()(ent2_avg_emb) output = concatenate([ent1_flatten, ent2_flatten]) model = Model(inputs=[input_ids, index_border_ent1, index_border_ent2], outputs=output) model.build(input_shape=(None, max_seq_len)) load_bert_weights(bert_layer, model_ckpt) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def make_entity_start_model(bert_path, ckpt_file, max_seq_len, bert_dim): model_ckpt = bert_path + ckpt_file bert_params = params_from_pretrained_ckpt(bert_path) bert_layer = BertModelLayer.from_params(bert_params, name="bert", trainable=True) slice_fn = make_gather_entity_start_fn(bert_dim) input_ids = Input(shape=(max_seq_len, ), dtype='int32') index_ent1 = Input(shape=(2, ), dtype='int32') index_ent2 = Input(shape=(2, ), dtype='int32') bert_emb = bert_layer(input_ids) ent1_start = Lambda(lambda x: slice_fn(x))([bert_emb, index_ent1]) ent2_start = Lambda(lambda x: slice_fn(x))([bert_emb, index_ent2]) concat = concatenate([ent1_start, ent2_start]) output = Dense(2, activation='softmax')(concat) model = Model(inputs=[input_ids, index_ent1, index_ent2], outputs=output) model.build(input_shape=(None, max_seq_len)) load_bert_weights(bert_layer, model_ckpt) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def load_keras_model(model_dir, max_seq_len): from tensorflow.python import keras from bert import BertModelLayer from bert.loader import StockBertConfig, load_stock_weights, params_from_pretrained_ckpt bert_config_file = os.path.join(model_dir, "bert_config.json") bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt") l_bert = BertModelLayer.from_params( params_from_pretrained_ckpt(model_dir)) l_input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") l_token_type_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="token_type_ids") output = l_bert([l_input_ids, l_token_type_ids]) model = keras.Model(inputs=[l_input_ids, l_token_type_ids], outputs=output) model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)]) load_stock_weights(l_bert, bert_ckpt_file) return model
def test_eager_loading(self): print("Eager Execution:", tf.executing_eagerly()) # a temporal mini bert model_dir model_dir = self.create_mini_bert_weights() bert_params = loader.params_from_pretrained_ckpt(model_dir) bert_params.adapter_size = 32 bert = BertModelLayer.from_params(bert_params, name="bert") model = keras.models.Sequential([ keras.layers.InputLayer(input_shape=(128, )), bert, keras.layers.Lambda(lambda x: x[:, 0, :]), keras.layers.Dense(2) ]) model.build(input_shape=(None, 128)) model.compile( optimizer=keras.optimizers.Adam(), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")], run_eagerly=True) loader.load_stock_weights(bert, model_dir) model.summary()
def create_model(max_seq_len, classes, bert_ckpt_file): with tf.io.gfile.GFile(config.BERT_CONFIG_FILE, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name='bert') input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") bert_output = bert(input_ids) print(f"Shape of BERT Embedding layer :{bert_output.shape}") #input will be having a shape of (None,max_seq_len,hidden_layer(768)) #we can use lambda function to reshape it to (None,hidden_layer) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output) cls_out = keras.layers.Dropout(0.5)(cls_out) dense = keras.layers.Dense(units=768, activation="tanh")(cls_out) dropout = keras.layers.Dropout(0.5)(dense) output = keras.layers.Dense(units=len(classes), activation="softmax")(dropout) model = keras.Model(inputs=input_ids, outputs=output) model.build(input_shape=(None, max_seq_len)) load_stock_weights(bert, bert_ckpt_file) return model
def Create_Modle(max_seq_len, bert_ckpt_file): with tf.io.gfile.GFile(bert_config_file,'r') as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None # Creating Model bert = BertModelLayer.from_params(bert_params, name='bert') # Keras Input Layer input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name='input_ids') bert_output = bert(input_ids) # pirnting bert shape print('Bert Shape: ', bert_output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:,0,:])(bert_output) cls_out = keras.layers.Dropout(0.5)(cls_out) logits = keras.layers.Dense(units=768, activation='tanh')(cls_out) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(units=len(classes), activation='softmax')(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape = (None, max_seq_len)) load_stock_weights(bert, bert_ckpt_file) # loading weights return model # returning model
def create_model(max_seq_len, bert_ckpt_file, classes): with tf.io.gfile.GFile(config.bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") bert_output = bert(input_ids) print("bert shape", bert_output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output) cls_out = keras.layers.Dropout(config.DROPOUT)(cls_out) logits = keras.layers.Dense(units=768, activation="tanh")(cls_out) logits = keras.layers.Dropout(config.DROPOUT)(logits) logits = keras.layers.Dense(units=len(classes), activation="softmax")(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) load_stock_weights(bert, config.bert_ckpt_file) print(model.summary()) model.compile( optimizer=config.OPTIMIZER, loss=config.LOSS, metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")]) return model
def model_factory( name: str, external_datasets: ExternalDatasets, preprocessor: Preprocessor, architecture: Architecture, file_system: FileSystem, ) -> keras.Model: """The create_model method is a helper which accepts max input sequence length and the number of intents (classification bins/buckets). The logic returns a BERT evaluator that matches the specified architecture. :param name: :type name: :param external_datasets: :type external_datasets: :param preprocessor: :type preprocessor: :param architecture: :type architecture: :param file_system: :type file_system: :return: :rtype: """ with tf.io.gfile.GFile(file_system.get_bert_config_path()) as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name=name) input_ids = keras.layers.Input( shape=(preprocessor.max_sequence_length, ), dtype='int32', name="input_ids") bert_output = bert(input_ids) clf_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output) clf_out = keras.layers.Dropout( architecture.clf_out_dropout_rate)(clf_out) logits = keras.layers.Dense( units=BertModelParameters().bert_h_param, activation=architecture.clf_out_activation)(clf_out) logits = keras.layers.Dropout(architecture.logits_dropout_rate)(logits) logits = keras.layers.Dense( units=len(external_datasets.all_intents()), activation=architecture.logits_activation)(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, preprocessor.max_sequence_length)) load_stock_weights(bert, file_system.get_bert_model_path()) return model
def create_model(self, type: str, adapter_size=None): """Creates a classification model. Input parameters: type: "binary" to build a model for binary classification, "multi" for multiclass classification. """ self.type = type # adapter_size = 64 # see - arXiv:1902.00751 if type == 'binary': class_count = 2 elif type == 'multi': class_count = 3 else: raise TypeError("Choose a proper type of classification") # create the bert layer with tf.io.gfile.GFile(self._bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = adapter_size bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(self.max_seq_len,), dtype='int32', name="input_ids") # token_type_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32', name="token_type_ids") # output = bert([input_ids, token_type_ids]) output = bert(input_ids) print("bert shape", output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) cls_out = keras.layers.Dropout(0.3)(cls_out) logits = keras.layers.Dense(units=768, activation="relu")(cls_out) # logits = keras.layers.Dropout(0.3)(logits) # logits = keras.layers.Dense(units=256, activation="relu")(logits) logits = keras.layers.Dropout(0.4)(logits) logits = keras.layers.Dense(units=class_count, activation="softmax")(logits) # model = keras.Model(inputs=[input_ids , token_type_ids], outputs=logits) # model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)]) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, self.max_seq_len)) # load the pre-trained model weights load_stock_weights(bert, self._bert_ckpt_file) # freeze weights if adapter-BERT is used if adapter_size is not None: self.freeze_bert_layers(bert) model.compile(optimizer=keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), # loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name="acc")] # metrics=[tf.keras.metrics.BinaryAccuracy(name="acc")] ) model.summary() self.model = model
def build(self, max_seq_length, bert_ckpt_file=bert_ckpt_file_location, **kwargs): optimizer = kwargs.get("optimizer", "adam") metrics = kwargs.get("metrics", ['accuracy']) adapter_size = kwargs.get("adapter_size", 64) dropout_rate = kwargs.get('dropout_rate', 0.5) # adapter_size = 64 # see - arXiv:1902.00751 # create the bert layer with tf.io.gfile.GFile( os.path.join(abs_path, bert_config_file_location), "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = adapter_size bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype='int32', name="input_ids") output = bert(input_ids) print("bert shape", output.shape) cls_out = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) cls_out = tf.keras.layers.Dropout(0.5)(cls_out) dense_out_1 = tf.keras.layers.Dense(units=768, activation="tanh")(cls_out) dense_out_1 = tf.keras.layers.Dropout(dropout_rate)(dense_out_1) dense_out_2 = tf.keras.layers.Dense(units=200, activation="softmax")(dense_out_1) dense_out_2 = tf.keras.layers.Dropout(dropout_rate)(dense_out_2) logits = tf.keras.layers.Dense(units=len(self.classes), activation='softmax')(dense_out_2) self.model = tf.keras.Model(inputs=input_ids, outputs=logits) self.model.build(input_shape=(None, max_seq_length)) # load the pre-trained model weights load_stock_weights(bert, os.path.join(abs_path, bert_ckpt_file)) # freeze weights if adapter-BERT is used if adapter_size is not None: freeze_bert_layers(bert) self.model.compile(optimizer=optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True), metrics=metrics) self.model.summary()
def createMultiModelMaximum(max_seq_len, bert_ckpt_file, bert_config_file, NUM_CLASS): with GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert_layer = BertModelLayer.from_params(bert_params, name="bert") bert_in = Input(shape=(max_seq_len, ), dtype='int32', name="input_ids_bert") bert_inter = bert_layer(bert_in) cls_out = Lambda(lambda seq: seq[:, 0, :])(bert_inter) cls_out = Dropout(0.5)(cls_out) bert_out = Dense(units=768, activation="tanh")(cls_out) # 768 before load_stock_weights(bert_layer, bert_ckpt_file) # image models: inceptionv3 = InceptionV3(weights='imagenet', include_top=False) resnet50 = ResNet50(weights='imagenet', include_top=False) res_out = resnet50.output res_out = GlobalAveragePooling2D()(res_out) res_out = Dropout(0.5)(res_out) res_out = Dense(2048)(res_out) res_out = Dropout(0.5)(res_out) res_out = Dense(768)(res_out) inc_out = inceptionv3.output inc_out = GlobalAveragePooling2D()(inc_out) inc_out = Dropout(0.5)(inc_out) inc_out = Dense(2048)(inc_out) inc_out = Dropout(0.5)(inc_out) inc_out = Dense(768)(inc_out) # merge = Concatenate()([res_out, inc_out, bert_out]) merge = Maximum()([res_out, inc_out, bert_out]) # restliche Layer x = Dense(2048)(merge) x = Dropout(0.5)(x) x = Dense(1024)(x) x = Dropout(0.5)(x) x = Dense(512)(x) x = Dropout(0.5)(x) output = Dense(NUM_CLASS, activation='softmax', name='output_layer')(x) model = Model(inputs=[resnet50.input, inceptionv3.input, bert_in], outputs=output) plot_model(model, to_file='multiple_inputs_text.png', show_shapes=True, dpi=600, expand_nested=False) return model, 17
def get_bert_layer(params, name="BERT"): """Get the BERT layer from a set of specific parameters Arguments: params {BERT Params} -- Parameters for the BERT model. Grab them using get_bert_config Keyword Arguments: name {str} -- Name of the model (default: {"BERT"}) Returns: BertModelLayer -- Layer to place in our model """ return BertModelLayer.from_params(params, name=name)
def create_model(max_seq_len, bert_config_file, bert_ckpt_file, adapter_size=64): """Creates a classification model.""" # adapter_size = 64 # see - arXiv:1902.00751 # max_seq_len # create the bert layer with tf.io.gfile.GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = adapter_size bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") # token_type_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32', name="token_type_ids") # output = bert([input_ids, token_type_ids]) output = bert(input_ids) print("bert shape", output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) cls_out = keras.layers.Dropout(0.5)(cls_out) logits = keras.layers.Dense(units=768, activation="tanh")(cls_out) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(units=6, activation="softmax")(logits) # model = keras.Model(inputs=[input_ids, token_type_ids], outputs=logits) # model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)]) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) # load the pre-trained model weights load_stock_weights(bert, bert_ckpt_file) # freeze weights if adapter-BERT is used if adapter_size is not None: freeze_bert_layers(bert) model.compile( optimizer=keras.optimizers.Adam(), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")]) model.summary() return model
def create_bert_model(self, max_seq_len=18): bc = None with tf.io.gfile.GFile(self.bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert = BertModelLayer.from_params(map_stock_config_to_params(bc), name="bert") input_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32', name="input_ids") token_type_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32', name="token_type_ids") output = bert([input_ids, token_type_ids]) model = keras.Model(inputs=[input_ids, token_type_ids], outputs=output) return model, bert, (input_ids, token_type_ids)
def get_bert_model(max_length: int, freeze_bert_layers: bool = False, load_bert_weights: bool = True) -> tf.keras.Model: """ Requires a bert folder downloaded from https://github.com/google-research/bert :param max_length: maximum size of a sentence :return: tensorflow model object """ bert_params: BertModelLayer.Params = params_from_pretrained_ckpt(model_dir) l_bert: BertModelLayer = BertModelLayer.from_params(bert_params, name='bert') if freeze_bert_layers: # With all bert weights frozen, the performance is not very good l_bert.apply_adapter_freeze() l_bert.trainable = False l_input_ids: tf.Tensor = tf.keras.layers.Input(shape=(max_length, ), dtype='int32') # If needed, usage of token_type_ids is described here: https://github.com/kpe/bert-for-tf2/blob/master/examples/gpu_movie_reviews.ipynb output: tf.Tensor = l_bert( l_input_ids) # [batch_size, max_seq_len, hidden_size] output = tf.keras.layers.GlobalAveragePooling1D()( output) # [batch_size, hidden_size] # Fine-tune for task output = tf.keras.layers.Dense(class_count, activation='softmax')( output) # [batch_size, class_count] model: tf.keras.Model = tf.keras.Model(inputs=[l_input_ids], outputs=output) # Comment from bert repo: The learning rate we used in the paper was 1e-4. # However, if you are doing additional steps of pre-training starting from an existing BERT checkpoint, you should use a smaller learning rate (e.g., 2e-5) model.compile(input_shape=[(None, max_length), (None, max_length)], loss=tf.keras.losses.SparseCategoricalCrossentropy(), optimizer=tf.keras.optimizers.Adam(lr=1e-5), metrics=['accuracy']) model.summary() if load_bert_weights: bert_ckpt_file: str = os.path.join(model_dir, "bert_model.ckpt") load_stock_weights(l_bert, bert_ckpt_file) return model
def build_model(bert_config, init_checkpoint, max_seq_len): bert_params = from_json_file(bert_config) l_bert = BertModelLayer.from_params(bert_params, name="bert") # Input and output endpoints l_input_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32') l_token_type_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32') l_input_mask = keras.layers.Input(shape=(max_seq_len,), dtype='int32') output = l_bert([l_input_ids, l_token_type_ids], mask=l_input_mask, training=False) # [batch_size, max_seq_len, hidden_size] print('Output shape: {}'.format(output.get_shape())) # Build model model = keras.Model(inputs=[l_input_ids, l_token_type_ids, l_input_mask], outputs=output) # loading the original pre-trained weights into the BERT layer: load_stock_weights(l_bert, init_checkpoint) return model
def _load_bert(self, bert_config_file, bert_ckpt_file): try: with tf.io.gfile.GFile(bert_config_file, 'r') as gf: bert_config = StockBertConfig.from_json_string(gf.read()) bert_params = map_stock_config_to_params(bert_config) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name='bert') except Exception as e: print(e) raise e input_ = keras.layers.Input(shape=(self.max_seq_len, ), dtype='int64', name="input_ids") x = bert(input_) # take the first embedding of BERT as the output embedding output_ = keras.layers.Lambda(lambda seq: seq[:,0,:])(x) model = keras.Model(inputs=input_, outputs=output_) model.build(input_shape=(None, self.max_seq_len)) load_stock_weights(bert, bert_ckpt_file) return model
def BERTClassifier(max_seq_len=128, bert_model_dir='models/chinese_L-12_H-768_A-12', do_lower_case=False): # load bert parameters with tf.io.gfile.GFile(os.path.join(bert_model_dir, "bert_config.json"), "r") as reader: stock_params = StockBertConfig.from_json_string(reader.read()) bert_params = stock_params.to_bert_model_layer_params() # create bert structure according to the parameters bert = BertModelLayer.from_params(bert_params, name="bert") # inputs input_token_ids = tf.keras.Input((max_seq_len, ), dtype=tf.int32, name='input_ids') input_segment_ids = tf.keras.Input((max_seq_len, ), dtype=tf.int32, name='token_type_ids') # classifier output = bert([input_token_ids, input_segment_ids]) cls_out = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) cls_out = tf.keras.layers.Dropout(rate=0.5)(cls_out) logits = tf.keras.layers.Dense(units=cls_out.shape[-1], activation=tf.math.tanh)(cls_out) logits = tf.keras.layers.Dropout(rate=0.5)(logits) logits = tf.keras.layers.Dense(units=2, activation=tf.nn.softmax)(logits) # create model containing only bert layer model = tf.keras.Model(inputs=[input_token_ids, input_segment_ids], outputs=logits) model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)]) # load bert layer weights load_stock_weights(bert, os.path.join(bert_model_dir, "bert_model.ckpt")) # freeze_bert_layers freeze_bert_layers(bert) model.compile( optimizer=tf.keras.optimizers.Adam(2e-5), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy')]) # create tokenizer, chinese character needs no lower case. tokenizer = FullTokenizer(vocab_file=os.path.join(bert_model_dir, "vocab.txt"), do_lower_case=do_lower_case) return model, tokenizer
def make_cls_encoder(bert_path, ckpt_file, max_seq_len, bert_dim): model_ckpt = bert_path + ckpt_file bert_params = params_from_pretrained_ckpt(bert_path) bert_layer = BertModelLayer.from_params(bert_params, name="bert", trainable=False) input_ids = Input(shape=(max_seq_len, ), dtype='int32') bert_emb = bert_layer(input_ids) output = Lambda(lambda x: tf.gather(x, indices=0, axis=1))(bert_emb) model = Model(inputs=input_ids, outputs=output) model.build(input_shape=(None, max_seq_len)) load_bert_weights(bert_layer, model_ckpt) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) return model
def __init__(self): self.max_len = 29 self.config_path = './Intent_cl/Bert_model/bert_config.json' self.data = pd.read_csv('./Intent_cl/Intent_dataset/category_data.csv') with open('./Intent_cl/Bert_model/vocab.json', 'r') as read_file: self.vocab = json.loads(read_file.read()) with tf.io.gfile.GFile(self.config_path, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) self.bert_params = map_stock_config_to_params(bc) self.bert_params.adapter_size = None self.intent_model = keras.models.load_model( './Intent_cl/Bert_model/nomal_news_weather_etc_kobert_model_category.h5', custom_objects={ "BertModelLayer": BertModelLayer.from_params(self.bert_params, name="bert") }) self.classes = self.data.intent.unique().tolist()
def test_concat(self): model_dir = self.create_mini_bert_weights() bert_params = loader.params_from_pretrained_ckpt(model_dir) bert_params.adapter_size = 32 bert = BertModelLayer.from_params(bert_params, name="bert") max_seq_len = 4 model = keras.models.Sequential([ keras.layers.InputLayer(input_shape=(max_seq_len, )), bert, keras.layers.TimeDistributed( keras.layers.Dense(bert_params.hidden_size)), keras.layers.TimeDistributed(keras.layers.LayerNormalization()), keras.layers.TimeDistributed(keras.layers.Activation("tanh")), pf.Concat([ keras.layers.Lambda(lambda x: tf.math.reduce_max(x, axis=1) ), # GlobalMaxPooling1D keras.layers.Lambda(lambda x: tf.math.reduce_mean(x, axis=1) ), # GlobalAvgPooling1 ]), keras.layers.Dense(units=bert_params.hidden_size), keras.layers.Activation("tanh"), keras.layers.Dense(units=2) ]) model.build(input_shape=(None, max_seq_len)) model.summary() model.compile( optimizer=keras.optimizers.Adam(), loss=[ keras.losses.SparseCategoricalCrossentropy(from_logits=True) ], metrics=[keras.metrics.SparseCategoricalAccuracy()], run_eagerly=True) loader.load_stock_weights(bert, model_dir) model.summary()
def __init__(self, model_dir, max_length, bert_params, num_layers, trainable): super(EncoderBert, self).__init__(self) assert isinstance(max_length, int) assert bert_params is not None or model_dir is not None if bert_params is None: assert os.path.exists(model_dir) bert_params = params_from_pretrained_ckpt(model_dir) if isinstance(num_layers, int): bert_params.num_layers = num_layers if bert_params.max_position_embeddings < max_length: bert_params.max_position_embeddings = max_length l_bert = BertModelLayer.from_params(bert_params, name="bert") l_input_ids = tf.keras.layers.Input(shape=(max_length, ), dtype='int32') output = l_bert(l_input_ids) model = tf.keras.Model(inputs=l_input_ids, outputs=output) model.build(input_shape=(None, max_length)) def flatten_layers(root_layer): if isinstance(root_layer, tf.keras.layers.Layer): yield root_layer for layer in root_layer._layers: for sub_layer in flatten_layers(layer): yield sub_layer if not trainable: for layer in flatten_layers(l_bert): layer.trainable = False self.model = model if model_dir is not None: bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt") load_stock_weights(l_bert, bert_ckpt_file)
def __init__( self, bert_model_path, max_length=300, q_units=100, p_units=200, ): super(EnBertBidaf, self).__init__() # ************** BERT EMBEDDING PART ************** bert_params = params_from_pretrained_ckpt(bert_model_path) self.bert_layer = BertModelLayer.from_params(bert_params, name="bert") self.bert_layer.trainable = False # # linear transform bert embbedding for question # self.linear_q_bert = tf.keras.layers.Dense(q_units, activation="linear") # # linear transform bert embbedding for paragraph # self.linear_p_bert = tf.keras.layers.Dense(p_units, activation="linear") # ******************* BIDAF PART ******************* # return (a1...aT, cT) if LSTM (a1...aT, aT, cT) # if Bidirectional (a1...aT, a1T, a2T, c1T, c2T) self.q_lstm1 = get_rnn_layer(q_units) self.q_lstm2 = get_rnn_layer(q_units) # For weighted average hidden units in question (self attention) self.dense_q = tf.keras.layers.Dense(1, activation='linear') # return (a1...aT, cT) if LSTM (a1...aT, aT, cT) # if Bidirectional (a1...aT, a1T, a2T, c1T, c2T) self.p_lstm1 = get_rnn_layer(p_units) self.p_lstm2 = get_rnn_layer(p_units) # W bilinear for question -> paragraph attention self.dense_bilinear = tf.keras.layers.Dense(2 * p_units, activation="linear") # Dense layers for predicting self.dense1 = tf.keras.layers.Dense(64, activation='relu') self.dense2 = tf.keras.layers.Dense(1, activation='linear')
def create_text_model(max_seq_len, bert_ckpt_file, bert_config_file, NUM_CLASS, overwriteLayerAndEmbeddingSize=False, isPreTrained=False, pathToBertModelWeights=None, isTrainable=True): with GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) if overwriteLayerAndEmbeddingSize: bc.max_position_embeddings = max_seq_len bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") bert_output = bert(input_ids) print("bert shape", bert_output.shape) cls_out = Lambda(lambda seq: seq[:, 0, :], name='bert_output_layer_768')(bert_output) cls_out = Dropout(0.5)(cls_out) output = Dense(NUM_CLASS, activation="softmax")(cls_out) # model_bert = Model(inputs=input_ids, outputs=output, name='BERT') model_bert.build(input_shape=(None, max_seq_len)) if not isPreTrained: load_stock_weights(bert, bert_ckpt_file) return model_bert else: model_bert.load_weights(pathToBertModelWeights) if not isTrainable: for layer in model_bert.layers: layer.trainable = False return model_bert, 2