def test_load_pretrained(self): print("Eager Execution:", tf.executing_eagerly()) bert_params = loader.params_from_pretrained_ckpt(self.bert_ckpt_dir) bert_params.adapter_size = 32 bert = BertModelLayer.from_params(bert_params, name="bert") model = keras.models.Sequential([ keras.layers.InputLayer(input_shape=(128, )), bert, keras.layers.Lambda(lambda x: x[:, 0, :]), keras.layers.Dense(2) ]) # we need to freeze before build/compile - otherwise keras counts the params twice if bert_params.adapter_size is not None: freeze_bert_layers(bert) model.build(input_shape=(None, 128)) model.compile( optimizer=keras.optimizers.Adam(), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")]) load_stock_weights(bert, self.bert_ckpt_file) model.summary()
def create_model(max_seq_len,adapter_size = 64): # Adapter size for adapter-bert # Creating Base Layer from bert_config with tf.io.gfile.GFile(BERT_CONFIG_FILE, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = adapter_size bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(MAX_SEQ_LEN,), dtype='int32', name="input_ids") output = bert(input_ids) print("bert shape", output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) cls_out = keras.layers.Dropout(0.5)(cls_out) logits = keras.layers.Dense(units=768, activation="tanh")(cls_out) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(units=2, activation="softmax")(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) load_stock_weights(bert, BERT_CKPT_FILE) if adapter_size is not None: freeze_bert_layers(bert) model.compile(optimizer=keras.optimizers.Adam(), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")]) print(model.summary()) return model
def load_keras_model(model_dir, max_seq_len): from tensorflow.python import keras from bert import BertModelLayer from bert.loader import StockBertConfig, load_stock_weights, params_from_pretrained_ckpt bert_config_file = os.path.join(model_dir, "bert_config.json") bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt") l_bert = BertModelLayer.from_params( params_from_pretrained_ckpt(model_dir)) l_input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") l_token_type_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="token_type_ids") output = l_bert([l_input_ids, l_token_type_ids]) model = keras.Model(inputs=[l_input_ids, l_token_type_ids], outputs=output) model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)]) load_stock_weights(l_bert, bert_ckpt_file) return model
def load_keras_model(model_dir, max_seq_len): # keras 加载BERT from tensorflow.python import keras from bert import BertModelLayer from bert.loader import StockBertConfig, load_stock_weights bert_config_file = os.path.join(model_dir, "bert_config.json") bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt") with tf.io.gfile.GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) l_bert = BertModelLayer.from_params(bc.to_bert_model_layer_params(), name="bert") l_input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") l_token_type_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="token_type_ids") l = l_bert([l_input_ids, l_token_type_ids]) l = Lambda(lambda x: x[:, 0])(l) output = keras.layers.Dense(1, activation=keras.activations.sigmoid)(l) model = keras.Model(inputs=[l_input_ids, l_token_type_ids], outputs=output) model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)]) load_stock_weights(l_bert, bert_ckpt_file) return model
def predict_on_keras_model(self, input_ids, input_mask, token_type_ids): max_seq_len = input_ids.shape[-1] model, bert, k_inputs = self.create_bert_model(max_seq_len) model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)]) load_stock_weights(bert, self.bert_ckpt_file) k_res = model.predict([input_ids, token_type_ids]) return k_res
def test_eager_loading(self): print("Eager Execution:", tf.executing_eagerly()) # a temporal mini bert model_dir model_dir = self.create_mini_bert_weights() bert_params = loader.params_from_pretrained_ckpt(model_dir) bert_params.adapter_size = 32 bert = BertModelLayer.from_params(bert_params, name="bert") model = keras.models.Sequential([ keras.layers.InputLayer(input_shape=(128, )), bert, keras.layers.Lambda(lambda x: x[:, 0, :]), keras.layers.Dense(2) ]) model.build(input_shape=(None, 128)) model.compile( optimizer=keras.optimizers.Adam(), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")], run_eagerly=True) loader.load_stock_weights(bert, model_dir) model.summary()
def Create_Modle(max_seq_len, bert_ckpt_file): with tf.io.gfile.GFile(bert_config_file,'r') as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None # Creating Model bert = BertModelLayer.from_params(bert_params, name='bert') # Keras Input Layer input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name='input_ids') bert_output = bert(input_ids) # pirnting bert shape print('Bert Shape: ', bert_output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:,0,:])(bert_output) cls_out = keras.layers.Dropout(0.5)(cls_out) logits = keras.layers.Dense(units=768, activation='tanh')(cls_out) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(units=len(classes), activation='softmax')(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape = (None, max_seq_len)) load_stock_weights(bert, bert_ckpt_file) # loading weights return model # returning model
def test_multi(self): model_dir = self.model_dir print(model_dir) bert_config_file = os.path.join(model_dir, "bert_config.json") bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt") with tf.io.gfile.GFile(bert_config_file, "r") as reader: stock_params = StockBertConfig.from_json_string(reader.read()) bert_params = stock_params.to_bert_model_layer_params() l_bert = BertModelLayer.from_params(bert_params, name="bert") max_seq_len = 128 l_input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") l_token_type_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="token_type_ids") output = l_bert([l_input_ids, l_token_type_ids]) model = keras.Model(inputs=[l_input_ids, l_token_type_ids], outputs=output) model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)]) load_stock_weights(l_bert, bert_ckpt_file)
def create_model(max_seq_len, classes, bert_ckpt_file): with tf.io.gfile.GFile(config.BERT_CONFIG_FILE, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name='bert') input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") bert_output = bert(input_ids) print(f"Shape of BERT Embedding layer :{bert_output.shape}") #input will be having a shape of (None,max_seq_len,hidden_layer(768)) #we can use lambda function to reshape it to (None,hidden_layer) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output) cls_out = keras.layers.Dropout(0.5)(cls_out) dense = keras.layers.Dense(units=768, activation="tanh")(cls_out) dropout = keras.layers.Dropout(0.5)(dense) output = keras.layers.Dense(units=len(classes), activation="softmax")(dropout) model = keras.Model(inputs=input_ids, outputs=output) model.build(input_shape=(None, max_seq_len)) load_stock_weights(bert, bert_ckpt_file) return model
def construct_bert(model_dir, timesteps, classes, dense_dropout=0.5, attention_dropout=0.3, hidden_dropout=0.3, adapter_size=8): bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt") bert_config_file = os.path.join(model_dir, "bert_config.json") bert_params = bert.params_from_pretrained_ckpt(model_dir) bert_model = bert.BertModelLayer.from_params(bert_params, name="bert") input_ids = Input(shape=(timesteps, ), dtype='int32', name="input_ids_1") token_type_ids = Input(shape=(timesteps, ), dtype='int32', name="token_type_ids_1") dense = Dense(units=768, activation="tanh", name="dense") output = bert_model([input_ids, token_type_ids ]) # output: [batch_size, max_seq_len, hidden_size] print("bert shape", output.shape) cls_out = Lambda(lambda seq: seq[:, 0:1, :])(output) cls_out = Dropout(dense_dropout)(cls_out) logits = dense(cls_out) logits = Dropout(dense_dropout)(logits) logits = Dense(units=classes, activation="softmax", name="output_1")(logits) model = Model(inputs=[input_ids, token_type_ids], outputs=logits) model.build(input_shape=(None, timesteps)) # load the pre-trained model weights load_stock_weights(bert_model, bert_ckpt_file) return model
def create_model(max_seq_len, bert_ckpt_file, classes): with tf.io.gfile.GFile(config.bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") bert_output = bert(input_ids) print("bert shape", bert_output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output) cls_out = keras.layers.Dropout(config.DROPOUT)(cls_out) logits = keras.layers.Dense(units=768, activation="tanh")(cls_out) logits = keras.layers.Dropout(config.DROPOUT)(logits) logits = keras.layers.Dense(units=len(classes), activation="softmax")(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) load_stock_weights(bert, config.bert_ckpt_file) print(model.summary()) model.compile( optimizer=config.OPTIMIZER, loss=config.LOSS, metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")]) return model
def create_model(max_seq_len, bert_ckpt_file): with tf.io.gfile.GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") bert_output = bert(input_ids) print("bert shape", bert_output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output) cls_out = keras.layers.Dropout(0.5)(cls_out) logits = keras.layers.Dense(units=768, activation="tanh")(cls_out) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(units=len(classes), activation="softmax")(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) load_stock_weights(bert, bert_ckpt_file) return model
def model_factory( name: str, external_datasets: ExternalDatasets, preprocessor: Preprocessor, architecture: Architecture, file_system: FileSystem, ) -> keras.Model: """The create_model method is a helper which accepts max input sequence length and the number of intents (classification bins/buckets). The logic returns a BERT evaluator that matches the specified architecture. :param name: :type name: :param external_datasets: :type external_datasets: :param preprocessor: :type preprocessor: :param architecture: :type architecture: :param file_system: :type file_system: :return: :rtype: """ with tf.io.gfile.GFile(file_system.get_bert_config_path()) as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name=name) input_ids = keras.layers.Input( shape=(preprocessor.max_sequence_length, ), dtype='int32', name="input_ids") bert_output = bert(input_ids) clf_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output) clf_out = keras.layers.Dropout( architecture.clf_out_dropout_rate)(clf_out) logits = keras.layers.Dense( units=BertModelParameters().bert_h_param, activation=architecture.clf_out_activation)(clf_out) logits = keras.layers.Dropout(architecture.logits_dropout_rate)(logits) logits = keras.layers.Dense( units=len(external_datasets.all_intents()), activation=architecture.logits_activation)(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, preprocessor.max_sequence_length)) load_stock_weights(bert, file_system.get_bert_model_path()) return model
def create_model(self, type: str, adapter_size=None): """Creates a classification model. Input parameters: type: "binary" to build a model for binary classification, "multi" for multiclass classification. """ self.type = type # adapter_size = 64 # see - arXiv:1902.00751 if type == 'binary': class_count = 2 elif type == 'multi': class_count = 3 else: raise TypeError("Choose a proper type of classification") # create the bert layer with tf.io.gfile.GFile(self._bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = adapter_size bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(self.max_seq_len,), dtype='int32', name="input_ids") # token_type_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32', name="token_type_ids") # output = bert([input_ids, token_type_ids]) output = bert(input_ids) print("bert shape", output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) cls_out = keras.layers.Dropout(0.3)(cls_out) logits = keras.layers.Dense(units=768, activation="relu")(cls_out) # logits = keras.layers.Dropout(0.3)(logits) # logits = keras.layers.Dense(units=256, activation="relu")(logits) logits = keras.layers.Dropout(0.4)(logits) logits = keras.layers.Dense(units=class_count, activation="softmax")(logits) # model = keras.Model(inputs=[input_ids , token_type_ids], outputs=logits) # model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)]) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, self.max_seq_len)) # load the pre-trained model weights load_stock_weights(bert, self._bert_ckpt_file) # freeze weights if adapter-BERT is used if adapter_size is not None: self.freeze_bert_layers(bert) model.compile(optimizer=keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), # loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name="acc")] # metrics=[tf.keras.metrics.BinaryAccuracy(name="acc")] ) model.summary() self.model = model
def build(self, max_seq_length, bert_ckpt_file=bert_ckpt_file_location, **kwargs): optimizer = kwargs.get("optimizer", "adam") metrics = kwargs.get("metrics", ['accuracy']) adapter_size = kwargs.get("adapter_size", 64) dropout_rate = kwargs.get('dropout_rate', 0.5) # adapter_size = 64 # see - arXiv:1902.00751 # create the bert layer with tf.io.gfile.GFile( os.path.join(abs_path, bert_config_file_location), "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = adapter_size bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype='int32', name="input_ids") output = bert(input_ids) print("bert shape", output.shape) cls_out = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) cls_out = tf.keras.layers.Dropout(0.5)(cls_out) dense_out_1 = tf.keras.layers.Dense(units=768, activation="tanh")(cls_out) dense_out_1 = tf.keras.layers.Dropout(dropout_rate)(dense_out_1) dense_out_2 = tf.keras.layers.Dense(units=200, activation="softmax")(dense_out_1) dense_out_2 = tf.keras.layers.Dropout(dropout_rate)(dense_out_2) logits = tf.keras.layers.Dense(units=len(self.classes), activation='softmax')(dense_out_2) self.model = tf.keras.Model(inputs=input_ids, outputs=logits) self.model.build(input_shape=(None, max_seq_length)) # load the pre-trained model weights load_stock_weights(bert, os.path.join(abs_path, bert_ckpt_file)) # freeze weights if adapter-BERT is used if adapter_size is not None: freeze_bert_layers(bert) self.model.compile(optimizer=optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True), metrics=metrics) self.model.summary()
def createMultiModelMaximum(max_seq_len, bert_ckpt_file, bert_config_file, NUM_CLASS): with GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert_layer = BertModelLayer.from_params(bert_params, name="bert") bert_in = Input(shape=(max_seq_len, ), dtype='int32', name="input_ids_bert") bert_inter = bert_layer(bert_in) cls_out = Lambda(lambda seq: seq[:, 0, :])(bert_inter) cls_out = Dropout(0.5)(cls_out) bert_out = Dense(units=768, activation="tanh")(cls_out) # 768 before load_stock_weights(bert_layer, bert_ckpt_file) # image models: inceptionv3 = InceptionV3(weights='imagenet', include_top=False) resnet50 = ResNet50(weights='imagenet', include_top=False) res_out = resnet50.output res_out = GlobalAveragePooling2D()(res_out) res_out = Dropout(0.5)(res_out) res_out = Dense(2048)(res_out) res_out = Dropout(0.5)(res_out) res_out = Dense(768)(res_out) inc_out = inceptionv3.output inc_out = GlobalAveragePooling2D()(inc_out) inc_out = Dropout(0.5)(inc_out) inc_out = Dense(2048)(inc_out) inc_out = Dropout(0.5)(inc_out) inc_out = Dense(768)(inc_out) # merge = Concatenate()([res_out, inc_out, bert_out]) merge = Maximum()([res_out, inc_out, bert_out]) # restliche Layer x = Dense(2048)(merge) x = Dropout(0.5)(x) x = Dense(1024)(x) x = Dropout(0.5)(x) x = Dense(512)(x) x = Dropout(0.5)(x) output = Dense(NUM_CLASS, activation='softmax', name='output_layer')(x) model = Model(inputs=[resnet50.input, inceptionv3.input, bert_in], outputs=output) plot_model(model, to_file='multiple_inputs_text.png', show_shapes=True, dpi=600, expand_nested=False) return model, 17
def create_model(max_seq_len, bert_config_file, bert_ckpt_file, adapter_size=64): """Creates a classification model.""" # adapter_size = 64 # see - arXiv:1902.00751 # max_seq_len # create the bert layer with tf.io.gfile.GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = adapter_size bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") # token_type_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32', name="token_type_ids") # output = bert([input_ids, token_type_ids]) output = bert(input_ids) print("bert shape", output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) cls_out = keras.layers.Dropout(0.5)(cls_out) logits = keras.layers.Dense(units=768, activation="tanh")(cls_out) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(units=6, activation="softmax")(logits) # model = keras.Model(inputs=[input_ids, token_type_ids], outputs=logits) # model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)]) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) # load the pre-trained model weights load_stock_weights(bert, bert_ckpt_file) # freeze weights if adapter-BERT is used if adapter_size is not None: freeze_bert_layers(bert) model.compile( optimizer=keras.optimizers.Adam(), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")]) model.summary() return model
def load_albert_weights(bert: BertModelLayer, tfhub_model_path, tags=[]): """ Use this method to load the weights from a pre-trained BERT checkpoint into a bert layer. :param bert: a BertModelLayer instance within a built keras model. :param ckpt_path: checkpoint path, i.e. `uncased_L-12_H-768_A-12/bert_model.ckpt` or `albert_base_zh/albert_model.ckpt` :return: list of weights with mismatched shapes. This can be used to extend the segment/token_type embeddings. """ if not _is_tfhub_model(tfhub_model_path): print("Loading brightmart/albert_zh weights...") return loader.load_stock_weights(bert, tfhub_model_path) assert isinstance(bert, BertModelLayer), "Expecting a BertModelLayer instance as first argument" prefix = loader.bert_prefix(bert) with tf.Graph().as_default(): sm = tf.compat.v2.saved_model.load(tfhub_model_path, tags=tags) with tf.compat.v1.Session() as sess: sess.run(tf.compat.v1.global_variables_initializer()) stock_values = {v.name.split(":")[0]: v.read_value() for v in sm.variables} stock_values = sess.run(stock_values) # print("\n".join([str((n, v.shape)) for n,v in stock_values.items()])) loaded_weights = set() skip_count = 0 weight_value_tuples = [] skipped_weight_value_tuples = [] bert_params = bert.weights param_values = keras.backend.batch_get_value(bert.weights) for ndx, (param_value, param) in enumerate(zip(param_values, bert_params)): stock_name = map_to_tfhub_albert_variable_name(param.name, prefix) if stock_name in stock_values: ckpt_value = stock_values[stock_name] if param_value.shape != ckpt_value.shape: print("loader: Skipping weight:[{}] as the weight shape:[{}] is not compatible " "with the checkpoint:[{}] shape:{}".format(param.name, param.shape, stock_name, ckpt_value.shape)) skipped_weight_value_tuples.append((param, ckpt_value)) continue weight_value_tuples.append((param, ckpt_value)) loaded_weights.add(stock_name) else: print("loader: No value for:[{}], i.e.:[{}] in:[{}]".format(param.name, stock_name, tfhub_model_path)) skip_count += 1 keras.backend.batch_set_value(weight_value_tuples) print("Done loading {} BERT weights from: {} into {} (prefix:{}). " "Count of weights not found in the checkpoint was: [{}]. " "Count of weights with mismatched shape: [{}]".format( len(weight_value_tuples), tfhub_model_path, bert, prefix, skip_count, len(skipped_weight_value_tuples))) print("Unused weights from saved model:", "\n\t" + "\n\t".join(sorted(set(stock_values.keys()).difference(loaded_weights)))) return skipped_weight_value_tuples # (bert_weight, value_from_ckpt)
def build_model(bert_config, init_checkpoint, max_seq_len): bert_params = from_json_file(bert_config) l_bert = BertModelLayer.from_params(bert_params, name="bert") # Input and output endpoints l_input_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32') l_token_type_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32') l_input_mask = keras.layers.Input(shape=(max_seq_len,), dtype='int32') output = l_bert([l_input_ids, l_token_type_ids], mask=l_input_mask, training=False) # [batch_size, max_seq_len, hidden_size] print('Output shape: {}'.format(output.get_shape())) # Build model model = keras.Model(inputs=[l_input_ids, l_token_type_ids, l_input_mask], outputs=output) # loading the original pre-trained weights into the BERT layer: load_stock_weights(l_bert, init_checkpoint) return model
def BERTClassifier(max_seq_len=128, bert_model_dir='models/chinese_L-12_H-768_A-12', do_lower_case=False): # load bert parameters with tf.io.gfile.GFile(os.path.join(bert_model_dir, "bert_config.json"), "r") as reader: stock_params = StockBertConfig.from_json_string(reader.read()) bert_params = stock_params.to_bert_model_layer_params() # create bert structure according to the parameters bert = BertModelLayer.from_params(bert_params, name="bert") # inputs input_token_ids = tf.keras.Input((max_seq_len, ), dtype=tf.int32, name='input_ids') input_segment_ids = tf.keras.Input((max_seq_len, ), dtype=tf.int32, name='token_type_ids') # classifier output = bert([input_token_ids, input_segment_ids]) cls_out = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) cls_out = tf.keras.layers.Dropout(rate=0.5)(cls_out) logits = tf.keras.layers.Dense(units=cls_out.shape[-1], activation=tf.math.tanh)(cls_out) logits = tf.keras.layers.Dropout(rate=0.5)(logits) logits = tf.keras.layers.Dense(units=2, activation=tf.nn.softmax)(logits) # create model containing only bert layer model = tf.keras.Model(inputs=[input_token_ids, input_segment_ids], outputs=logits) model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)]) # load bert layer weights load_stock_weights(bert, os.path.join(bert_model_dir, "bert_model.ckpt")) # freeze_bert_layers freeze_bert_layers(bert) model.compile( optimizer=tf.keras.optimizers.Adam(2e-5), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy')]) # create tokenizer, chinese character needs no lower case. tokenizer = FullTokenizer(vocab_file=os.path.join(bert_model_dir, "vocab.txt"), do_lower_case=do_lower_case) return model, tokenizer
def _load_bert(self, bert_config_file, bert_ckpt_file): try: with tf.io.gfile.GFile(bert_config_file, 'r') as gf: bert_config = StockBertConfig.from_json_string(gf.read()) bert_params = map_stock_config_to_params(bert_config) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name='bert') except Exception as e: print(e) raise e input_ = keras.layers.Input(shape=(self.max_seq_len, ), dtype='int64', name="input_ids") x = bert(input_) # take the first embedding of BERT as the output embedding output_ = keras.layers.Lambda(lambda seq: seq[:,0,:])(x) model = keras.Model(inputs=input_, outputs=output_) model.build(input_shape=(None, self.max_seq_len)) load_stock_weights(bert, bert_ckpt_file) return model
def create_model(max_seq_len, lr=1e-5): """ Creates a BERT classification model. The model architecutre is raw input -> BERT input -> drop out layer to prevent overfitting -> dense layer that outputs predicted probability. max_seq_len: the maximum sequence length lr: learning rate of optimizer """ # create the bert layer with tf.io.gfile.GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") output = bert(input_ids) print("bert shape", output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) # Dropout layer cls_out = keras.layers.Dropout(0.8)(cls_out) # Dense layer with probibility output logits = keras.layers.Dense(units=2, activation="softmax")(cls_out) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) # load the pre-trained model weights load_stock_weights(bert, bert_ckpt_file) model.compile( optimizer=keras.optimizers.Adam(learning_rate=lr), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")]) model.summary() return model
def test_concat(self): model_dir = self.create_mini_bert_weights() bert_params = loader.params_from_pretrained_ckpt(model_dir) bert_params.adapter_size = 32 bert = BertModelLayer.from_params(bert_params, name="bert") max_seq_len = 4 model = keras.models.Sequential([ keras.layers.InputLayer(input_shape=(max_seq_len, )), bert, keras.layers.TimeDistributed( keras.layers.Dense(bert_params.hidden_size)), keras.layers.TimeDistributed(keras.layers.LayerNormalization()), keras.layers.TimeDistributed(keras.layers.Activation("tanh")), pf.Concat([ keras.layers.Lambda(lambda x: tf.math.reduce_max(x, axis=1) ), # GlobalMaxPooling1D keras.layers.Lambda(lambda x: tf.math.reduce_mean(x, axis=1) ), # GlobalAvgPooling1 ]), keras.layers.Dense(units=bert_params.hidden_size), keras.layers.Activation("tanh"), keras.layers.Dense(units=2) ]) model.build(input_shape=(None, max_seq_len)) model.summary() model.compile( optimizer=keras.optimizers.Adam(), loss=[ keras.losses.SparseCategoricalCrossentropy(from_logits=True) ], metrics=[keras.metrics.SparseCategoricalAccuracy()], run_eagerly=True) loader.load_stock_weights(bert, model_dir) model.summary()
def create_text_model(max_seq_len, bert_ckpt_file, bert_config_file, NUM_CLASS, overwriteLayerAndEmbeddingSize=False, isPreTrained=False, pathToBertModelWeights=None, isTrainable=True): with GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) if overwriteLayerAndEmbeddingSize: bc.max_position_embeddings = max_seq_len bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") bert_output = bert(input_ids) print("bert shape", bert_output.shape) cls_out = Lambda(lambda seq: seq[:, 0, :], name='bert_output_layer_768')(bert_output) cls_out = Dropout(0.5)(cls_out) output = Dense(NUM_CLASS, activation="softmax")(cls_out) # model_bert = Model(inputs=input_ids, outputs=output, name='BERT') model_bert.build(input_shape=(None, max_seq_len)) if not isPreTrained: load_stock_weights(bert, bert_ckpt_file) return model_bert else: model_bert.load_weights(pathToBertModelWeights) if not isTrainable: for layer in model_bert.layers: layer.trainable = False return model_bert, 2
def create_model(max_seq_len, bert_ckpt_dir, bert_config_file): with tf.io.gfile.GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = tf.keras.layers.Input(shape = (max_seq_len, ), dtype= tf.int32, name= 'input_ids') bert_output = bert(input_ids) bert_output = bert_output[:,0,:] drop_out = tf.keras.layers.Dropout(0.5)(bert_output) d_out = tf.keras.layers.Dense(768, activation='tanh')(drop_out) logits = tf.keras.layers.Dropout(0.5)(d_out) out = tf.keras.layers.Dense(2, activation='softmax')(logits) model = tf.keras.models.Model(inputs = input_ids, outputs = out) model.summary() load_stock_weights(bert, bert_ckpt_file) return model
def __init__(self, bert_config_file, bert_ckpt_file, max_seq_len, lr=1e-5): """ bert_config_file: path to bert configuration parameters bert_ckpt_file: path to pretrained bert checkpoint max_seq_len: maximum sequence lenght lr: learning rate """ # create the bert layer with tf.io.gfile.GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") output = bert(input_ids) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) # Dropout layer cls_out = keras.layers.Dropout(0.8)(cls_out) # Dense layer with probibility output logits = keras.layers.Dense(units=2, activation="softmax")(cls_out) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) # load the pre-trained model weights load_stock_weights(bert, bert_ckpt_file) model.compile( optimizer=keras.optimizers.Adam(learning_rate=lr), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")]) self.model = model
def build_simple_bert(model_dir: str, max_seq_len: int, learning_rate: float, adapter_size: int = 64) -> tf.keras.Model: assert (max_seq_len > 0) and (max_seq_len <= MAX_SEQ_LENGTH) input_word_ids = tf.keras.layers.Input(shape=(max_seq_len, ), dtype=tf.int32, name="input_word_ids_for_BERT") segment_ids = tf.keras.layers.Input(shape=(max_seq_len, ), dtype=tf.int32, name="segment_ids_for_BERT") bert_params = params_from_pretrained_ckpt(model_dir) bert_params.adapter_size = adapter_size bert_params.adapter_init_scale = 1e-5 bert_model_ckpt = os.path.join(model_dir, "bert_model.ckpt") bert_layer = BertModelLayer.from_params(bert_params, name="BERT_Layer") bert_output = bert_layer([input_word_ids, segment_ids]) cls_output = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :], name='BERT_cls')(bert_output) output_layer = tf.keras.layers.Dense( units=1, activation='sigmoid', kernel_initializer='glorot_uniform', name='HyponymHypernymOutput')(cls_output) model = tf.keras.Model(inputs=[input_word_ids, segment_ids], outputs=output_layer, name='SimpleBERT') model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)]) load_stock_weights(bert_layer, bert_model_ckpt) bert_layer.apply_adapter_freeze() bert_layer.embeddings_layer.trainable = False model.compile(optimizer=pf.optimizers.RAdam(learning_rate=learning_rate), loss=tf.keras.losses.BinaryCrossentropy(), metrics=[tf.keras.metrics.AUC(name='auc')], experimental_run_tf_function=True) return model
def create_model(config, adapter_size=64): """Creates a classification model.""" # create the bert layer with tf.io.gfile.GFile(config.bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = adapter_size bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(config.max_seq_len, ), dtype='int32', name="input_ids") output = bert(input_ids) matmul_qk = tf.matmul(output, output, transpose_b=True) attention_weights = tf.nn.softmax(matmul_qk, axis=-1) logits = tf.matmul(attention_weights, output) logits = tf.reduce_sum(logits, axis=1) * config.attn_weight cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])( output) * config.cls_weight logits = cls_out + logits logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.LayerNormalization()(logits) logits = keras.layers.Dense(units=len(config.classes))(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, config.max_seq_len)) # load the pre-trained model weights load_stock_weights(bert, config.bert_ckpt_file) # freeze weights if adapter-BERT is used # if adapter_size is not None: # freeze_bert_layers(bert) sigmoid_cross_entropy = tf.keras.losses.BinaryCrossentropy( from_logits=True, label_smoothing=config.label_smoothing) tfa_focal_loss = tfa.losses.SigmoidFocalCrossEntropy( alpha=config.focal_alpha, gamma=config.focal_gamma, from_logits=True) loss_func_list = { "sigmoid_cross_entropy_loss": sigmoid_cross_entropy, "focal_loss": tfa_focal_loss } model.compile(optimizer=keras.optimizers.Adam(), loss=loss_func_list[config.loss_func], metrics=[ MultiLabelAccuracy(batch_size=config.batch_size), MultiLabelPrecision(batch_size=config.batch_size), MultiLabelRecall(batch_size=config.batch_size), MultiLabelF1(batch_size=config.batch_size), HammingLoss(batch_size=config.batch_size) ]) model.summary() return model
def build_bert_and_cnn(model_dir: str, n_filters: int, hidden_layer_size: int, bayesian: bool, max_seq_len: int, learning_rate: float, **kwargs) -> tf.keras.Model: assert (max_seq_len > 0) and (max_seq_len <= MAX_SEQ_LENGTH) if bayesian: assert 'adapter_size' in kwargs print('An dapater size is {0}.'.format(kwargs['adapter_size'])) else: assert 'kl_weight' in kwargs print('KL weight is {0:.9f}.'.format(kwargs['kl_weight'])) input_word_ids = tf.keras.layers.Input(shape=(max_seq_len, ), dtype=tf.int32, name="input_word_ids_for_BERT") segment_ids = tf.keras.layers.Input(shape=(max_seq_len, ), dtype=tf.int32, name="segment_ids_for_BERT") output_mask = tf.keras.layers.Input(shape=(max_seq_len, ), dtype=tf.int32, name="output_mask_for_BERT") bert_params = params_from_pretrained_ckpt(model_dir) if not bayesian: bert_params.adapter_size = kwargs['adapter_size'] bert_params.adapter_init_scale = 1e-5 bert_model_ckpt = os.path.join(model_dir, "bert_model.ckpt") bert_layer = BertModelLayer.from_params(bert_params, name="BERT_Layer") bert_layer.trainable = not bayesian bert_output = bert_layer([input_word_ids, segment_ids]) cls_output = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :], name='BERT_cls')(bert_output) activation_type = 'tanh' initializer_type = 'glorot_uniform' if bayesian: kl_divergence_function = ( lambda q, p, _: (tfp.distributions.kl_divergence(q, p) * tf.constant( kwargs['kl_weight'], dtype=tf.float32, name='KL_weight'))) conv_layers = [ tfp.layers.Convolution1DFlipout( kernel_size=1, filters=n_filters, activation=activation_type, kernel_divergence_fn=kl_divergence_function, padding='same', name='Conv_1grams')(bert_output), tfp.layers.Convolution1DFlipout( kernel_size=2, filters=n_filters, activation=activation_type, kernel_divergence_fn=kl_divergence_function, padding='same', name='Conv_2grams')(bert_output), tfp.layers.Convolution1DFlipout( kernel_size=3, filters=n_filters, activation=activation_type, kernel_divergence_fn=kl_divergence_function, padding='same', name='Conv_3grams')(bert_output), tfp.layers.Convolution1DFlipout( kernel_size=4, filters=n_filters, activation=activation_type, kernel_divergence_fn=kl_divergence_function, padding='same', name='Conv_4grams')(bert_output), tfp.layers.Convolution1DFlipout( kernel_size=5, filters=n_filters, activation=activation_type, kernel_divergence_fn=kl_divergence_function, padding='same', name='Conv_5grams')(bert_output), ] else: kl_divergence_function = None conv_layers = [ tf.keras.layers.Conv1D(kernel_size=1, filters=n_filters, activation=activation_type, padding='same', kernel_initializer=initializer_type, name='Conv_1grams')(bert_output), tf.keras.layers.Conv1D(kernel_size=2, filters=n_filters, activation=activation_type, padding='same', kernel_initializer=initializer_type, name='Conv_2grams')(bert_output), tf.keras.layers.Conv1D(kernel_size=3, filters=n_filters, activation=activation_type, padding='same', kernel_initializer=initializer_type, name='Conv_3grams')(bert_output), tf.keras.layers.Conv1D(kernel_size=4, filters=n_filters, activation=activation_type, padding='same', kernel_initializer=initializer_type, name='Conv_4grams')(bert_output), tf.keras.layers.Conv1D(kernel_size=5, filters=n_filters, activation=activation_type, padding='same', kernel_initializer=initializer_type, name='Conv_5grams')(bert_output) ] conv_concat_layer = tf.keras.layers.Concatenate( name='ConvConcat')(conv_layers) masking_calc = MaskCalculator(output_dim=len(conv_layers) * n_filters, trainable=False, name='MaskCalculator')(output_mask) conv_concat_layer = tf.keras.layers.Multiply(name='MaskMultiplicator')( [conv_concat_layer, masking_calc]) conv_concat_layer = tf.keras.layers.Masking( name='Masking')(conv_concat_layer) feature_layer = tf.keras.layers.Concatenate(name='FeatureLayer')([ cls_output, tf.keras.layers.GlobalAveragePooling1D( name='AvePooling')(conv_concat_layer) ]) if bayesian: hidden_layer = tfp.layers.DenseFlipout( units=hidden_layer_size, activation=activation_type, kernel_divergence_fn=kl_divergence_function, name='HiddenLayer')(feature_layer) output_layer = tfp.layers.DenseFlipout( units=1, activation='sigmoid', kernel_divergence_fn=kl_divergence_function, name='HyponymHypernymOutput')(hidden_layer) else: feature_layer = tf.keras.layers.Dropout(rate=0.5, name='Dropout1')(feature_layer) hidden_layer = tf.keras.layers.Dense( units=hidden_layer_size, activation=activation_type, kernel_initializer=initializer_type, name='HiddenLayer')(feature_layer) hidden_layer = tf.keras.layers.Dropout(rate=0.5, name='Dropout2')(hidden_layer) output_layer = tf.keras.layers.Dense( units=1, activation='sigmoid', kernel_initializer='glorot_uniform', name='HyponymHypernymOutput')(hidden_layer) model = tf.keras.Model(inputs=[input_word_ids, segment_ids, output_mask], outputs=output_layer, name='BERT_CNN') model.build(input_shape=[(None, max_seq_len), (None, max_seq_len), (None, max_seq_len)]) load_stock_weights(bert_layer, bert_model_ckpt) if not bayesian: bert_layer.apply_adapter_freeze() bert_layer.embeddings_layer.trainable = False model.compile(optimizer=pf.optimizers.RAdam(learning_rate=learning_rate), loss=tf.keras.losses.BinaryCrossentropy(), metrics=[tf.keras.metrics.AUC(name='auc')], experimental_run_tf_function=not bayesian) return model