def load_keras_model(model_dir, max_seq_len): # keras 加载BERT from tensorflow.python import keras from bert import BertModelLayer from bert.loader import StockBertConfig, load_stock_weights bert_config_file = os.path.join(model_dir, "bert_config.json") bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt") with tf.io.gfile.GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) l_bert = BertModelLayer.from_params(bc.to_bert_model_layer_params(), name="bert") l_input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") l_token_type_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="token_type_ids") l = l_bert([l_input_ids, l_token_type_ids]) l = Lambda(lambda x: x[:, 0])(l) output = keras.layers.Dense(1, activation=keras.activations.sigmoid)(l) model = keras.Model(inputs=[l_input_ids, l_token_type_ids], outputs=output) model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)]) load_stock_weights(l_bert, bert_ckpt_file) return model
def test_multi(self): model_dir = self.model_dir print(model_dir) bert_config_file = os.path.join(model_dir, "bert_config.json") bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt") with tf.io.gfile.GFile(bert_config_file, "r") as reader: stock_params = StockBertConfig.from_json_string(reader.read()) bert_params = stock_params.to_bert_model_layer_params() l_bert = BertModelLayer.from_params(bert_params, name="bert") max_seq_len = 128 l_input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") l_token_type_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="token_type_ids") output = l_bert([l_input_ids, l_token_type_ids]) model = keras.Model(inputs=[l_input_ids, l_token_type_ids], outputs=output) model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)]) load_stock_weights(l_bert, bert_ckpt_file)
def create_model(max_seq_len, bert_ckpt_file): with tf.io.gfile.GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") bert_output = bert(input_ids) print("bert shape", bert_output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output) cls_out = keras.layers.Dropout(0.5)(cls_out) logits = keras.layers.Dense(units=768, activation="tanh")(cls_out) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(units=len(classes), activation="softmax")(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) load_stock_weights(bert, bert_ckpt_file) return model
def create_model(max_seq_len, bert_ckpt_file, classes): with tf.io.gfile.GFile(config.bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") bert_output = bert(input_ids) print("bert shape", bert_output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output) cls_out = keras.layers.Dropout(config.DROPOUT)(cls_out) logits = keras.layers.Dense(units=768, activation="tanh")(cls_out) logits = keras.layers.Dropout(config.DROPOUT)(logits) logits = keras.layers.Dense(units=len(classes), activation="softmax")(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) load_stock_weights(bert, config.bert_ckpt_file) print(model.summary()) model.compile( optimizer=config.OPTIMIZER, loss=config.LOSS, metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")]) return model
def Create_Modle(max_seq_len, bert_ckpt_file): with tf.io.gfile.GFile(bert_config_file,'r') as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None # Creating Model bert = BertModelLayer.from_params(bert_params, name='bert') # Keras Input Layer input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name='input_ids') bert_output = bert(input_ids) # pirnting bert shape print('Bert Shape: ', bert_output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:,0,:])(bert_output) cls_out = keras.layers.Dropout(0.5)(cls_out) logits = keras.layers.Dense(units=768, activation='tanh')(cls_out) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(units=len(classes), activation='softmax')(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape = (None, max_seq_len)) load_stock_weights(bert, bert_ckpt_file) # loading weights return model # returning model
def create_model(max_seq_len,adapter_size = 64): # Adapter size for adapter-bert # Creating Base Layer from bert_config with tf.io.gfile.GFile(BERT_CONFIG_FILE, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = adapter_size bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(MAX_SEQ_LEN,), dtype='int32', name="input_ids") output = bert(input_ids) print("bert shape", output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) cls_out = keras.layers.Dropout(0.5)(cls_out) logits = keras.layers.Dense(units=768, activation="tanh")(cls_out) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(units=2, activation="softmax")(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) load_stock_weights(bert, BERT_CKPT_FILE) if adapter_size is not None: freeze_bert_layers(bert) model.compile(optimizer=keras.optimizers.Adam(), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")]) print(model.summary()) return model
def create_model(max_seq_len, classes, bert_ckpt_file): with tf.io.gfile.GFile(config.BERT_CONFIG_FILE, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name='bert') input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") bert_output = bert(input_ids) print(f"Shape of BERT Embedding layer :{bert_output.shape}") #input will be having a shape of (None,max_seq_len,hidden_layer(768)) #we can use lambda function to reshape it to (None,hidden_layer) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output) cls_out = keras.layers.Dropout(0.5)(cls_out) dense = keras.layers.Dense(units=768, activation="tanh")(cls_out) dropout = keras.layers.Dropout(0.5)(dense) output = keras.layers.Dense(units=len(classes), activation="softmax")(dropout) model = keras.Model(inputs=input_ids, outputs=output) model.build(input_shape=(None, max_seq_len)) load_stock_weights(bert, bert_ckpt_file) return model
def model_factory( name: str, external_datasets: ExternalDatasets, preprocessor: Preprocessor, architecture: Architecture, file_system: FileSystem, ) -> keras.Model: """The create_model method is a helper which accepts max input sequence length and the number of intents (classification bins/buckets). The logic returns a BERT evaluator that matches the specified architecture. :param name: :type name: :param external_datasets: :type external_datasets: :param preprocessor: :type preprocessor: :param architecture: :type architecture: :param file_system: :type file_system: :return: :rtype: """ with tf.io.gfile.GFile(file_system.get_bert_config_path()) as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name=name) input_ids = keras.layers.Input( shape=(preprocessor.max_sequence_length, ), dtype='int32', name="input_ids") bert_output = bert(input_ids) clf_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output) clf_out = keras.layers.Dropout( architecture.clf_out_dropout_rate)(clf_out) logits = keras.layers.Dense( units=BertModelParameters().bert_h_param, activation=architecture.clf_out_activation)(clf_out) logits = keras.layers.Dropout(architecture.logits_dropout_rate)(logits) logits = keras.layers.Dense( units=len(external_datasets.all_intents()), activation=architecture.logits_activation)(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, preprocessor.max_sequence_length)) load_stock_weights(bert, file_system.get_bert_model_path()) return model
def build(self, max_seq_length, bert_ckpt_file=bert_ckpt_file_location, **kwargs): optimizer = kwargs.get("optimizer", "adam") metrics = kwargs.get("metrics", ['accuracy']) adapter_size = kwargs.get("adapter_size", 64) dropout_rate = kwargs.get('dropout_rate', 0.5) # adapter_size = 64 # see - arXiv:1902.00751 # create the bert layer with tf.io.gfile.GFile( os.path.join(abs_path, bert_config_file_location), "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = adapter_size bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = tf.keras.layers.Input(shape=(max_seq_length, ), dtype='int32', name="input_ids") output = bert(input_ids) print("bert shape", output.shape) cls_out = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) cls_out = tf.keras.layers.Dropout(0.5)(cls_out) dense_out_1 = tf.keras.layers.Dense(units=768, activation="tanh")(cls_out) dense_out_1 = tf.keras.layers.Dropout(dropout_rate)(dense_out_1) dense_out_2 = tf.keras.layers.Dense(units=200, activation="softmax")(dense_out_1) dense_out_2 = tf.keras.layers.Dropout(dropout_rate)(dense_out_2) logits = tf.keras.layers.Dense(units=len(self.classes), activation='softmax')(dense_out_2) self.model = tf.keras.Model(inputs=input_ids, outputs=logits) self.model.build(input_shape=(None, max_seq_length)) # load the pre-trained model weights load_stock_weights(bert, os.path.join(abs_path, bert_ckpt_file)) # freeze weights if adapter-BERT is used if adapter_size is not None: freeze_bert_layers(bert) self.model.compile(optimizer=optimizer, loss=tf.keras.losses.SparseCategoricalCrossentropy( from_logits=True), metrics=metrics) self.model.summary()
def create_model(self, type: str, adapter_size=None): """Creates a classification model. Input parameters: type: "binary" to build a model for binary classification, "multi" for multiclass classification. """ self.type = type # adapter_size = 64 # see - arXiv:1902.00751 if type == 'binary': class_count = 2 elif type == 'multi': class_count = 3 else: raise TypeError("Choose a proper type of classification") # create the bert layer with tf.io.gfile.GFile(self._bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = adapter_size bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(self.max_seq_len,), dtype='int32', name="input_ids") # token_type_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32', name="token_type_ids") # output = bert([input_ids, token_type_ids]) output = bert(input_ids) print("bert shape", output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) cls_out = keras.layers.Dropout(0.3)(cls_out) logits = keras.layers.Dense(units=768, activation="relu")(cls_out) # logits = keras.layers.Dropout(0.3)(logits) # logits = keras.layers.Dense(units=256, activation="relu")(logits) logits = keras.layers.Dropout(0.4)(logits) logits = keras.layers.Dense(units=class_count, activation="softmax")(logits) # model = keras.Model(inputs=[input_ids , token_type_ids], outputs=logits) # model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)]) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, self.max_seq_len)) # load the pre-trained model weights load_stock_weights(bert, self._bert_ckpt_file) # freeze weights if adapter-BERT is used if adapter_size is not None: self.freeze_bert_layers(bert) model.compile(optimizer=keras.optimizers.Adam(), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), # loss=tf.keras.losses.BinaryCrossentropy(from_logits=True), metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name="acc")] # metrics=[tf.keras.metrics.BinaryAccuracy(name="acc")] ) model.summary() self.model = model
def createMultiModelMaximum(max_seq_len, bert_ckpt_file, bert_config_file, NUM_CLASS): with GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert_layer = BertModelLayer.from_params(bert_params, name="bert") bert_in = Input(shape=(max_seq_len, ), dtype='int32', name="input_ids_bert") bert_inter = bert_layer(bert_in) cls_out = Lambda(lambda seq: seq[:, 0, :])(bert_inter) cls_out = Dropout(0.5)(cls_out) bert_out = Dense(units=768, activation="tanh")(cls_out) # 768 before load_stock_weights(bert_layer, bert_ckpt_file) # image models: inceptionv3 = InceptionV3(weights='imagenet', include_top=False) resnet50 = ResNet50(weights='imagenet', include_top=False) res_out = resnet50.output res_out = GlobalAveragePooling2D()(res_out) res_out = Dropout(0.5)(res_out) res_out = Dense(2048)(res_out) res_out = Dropout(0.5)(res_out) res_out = Dense(768)(res_out) inc_out = inceptionv3.output inc_out = GlobalAveragePooling2D()(inc_out) inc_out = Dropout(0.5)(inc_out) inc_out = Dense(2048)(inc_out) inc_out = Dropout(0.5)(inc_out) inc_out = Dense(768)(inc_out) # merge = Concatenate()([res_out, inc_out, bert_out]) merge = Maximum()([res_out, inc_out, bert_out]) # restliche Layer x = Dense(2048)(merge) x = Dropout(0.5)(x) x = Dense(1024)(x) x = Dropout(0.5)(x) x = Dense(512)(x) x = Dropout(0.5)(x) output = Dense(NUM_CLASS, activation='softmax', name='output_layer')(x) model = Model(inputs=[resnet50.input, inceptionv3.input, bert_in], outputs=output) plot_model(model, to_file='multiple_inputs_text.png', show_shapes=True, dpi=600, expand_nested=False) return model, 17
def create_model(max_seq_len, bert_config_file, bert_ckpt_file, adapter_size=64): """Creates a classification model.""" # adapter_size = 64 # see - arXiv:1902.00751 # max_seq_len # create the bert layer with tf.io.gfile.GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = adapter_size bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") # token_type_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32', name="token_type_ids") # output = bert([input_ids, token_type_ids]) output = bert(input_ids) print("bert shape", output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) cls_out = keras.layers.Dropout(0.5)(cls_out) logits = keras.layers.Dense(units=768, activation="tanh")(cls_out) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(units=6, activation="softmax")(logits) # model = keras.Model(inputs=[input_ids, token_type_ids], outputs=logits) # model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)]) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) # load the pre-trained model weights load_stock_weights(bert, bert_ckpt_file) # freeze weights if adapter-BERT is used if adapter_size is not None: freeze_bert_layers(bert) model.compile( optimizer=keras.optimizers.Adam(), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")]) model.summary() return model
def get_adapter_BERT_layer(model_dir, adapter_size): """Create a adapter-BERT layer Arguments: model_dir {str} -- Path to the pretrained model files adapter_size {int} -- Size of adapter Returns: BERT -- BERT layer """ with open(model_dir + '/bert_config.json', 'r') as fd: bc = StockBertConfig.from_json_string(fd.read()) params = map_stock_config_to_params(bc) params.adapter_size = adapter_size return get_bert_layer(params)
def create_bert_model(self, max_seq_len=18): bc = None with tf.io.gfile.GFile(self.bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert = BertModelLayer.from_params(map_stock_config_to_params(bc), name="bert") input_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32', name="input_ids") token_type_ids = keras.layers.Input(shape=(max_seq_len,), dtype='int32', name="token_type_ids") output = bert([input_ids, token_type_ids]) model = keras.Model(inputs=[input_ids, token_type_ids], outputs=output) return model, bert, (input_ids, token_type_ids)
def BERTClassifier(max_seq_len=128, bert_model_dir='models/chinese_L-12_H-768_A-12', do_lower_case=False): # load bert parameters with tf.io.gfile.GFile(os.path.join(bert_model_dir, "bert_config.json"), "r") as reader: stock_params = StockBertConfig.from_json_string(reader.read()) bert_params = stock_params.to_bert_model_layer_params() # create bert structure according to the parameters bert = BertModelLayer.from_params(bert_params, name="bert") # inputs input_token_ids = tf.keras.Input((max_seq_len, ), dtype=tf.int32, name='input_ids') input_segment_ids = tf.keras.Input((max_seq_len, ), dtype=tf.int32, name='token_type_ids') # classifier output = bert([input_token_ids, input_segment_ids]) cls_out = tf.keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) cls_out = tf.keras.layers.Dropout(rate=0.5)(cls_out) logits = tf.keras.layers.Dense(units=cls_out.shape[-1], activation=tf.math.tanh)(cls_out) logits = tf.keras.layers.Dropout(rate=0.5)(logits) logits = tf.keras.layers.Dense(units=2, activation=tf.nn.softmax)(logits) # create model containing only bert layer model = tf.keras.Model(inputs=[input_token_ids, input_segment_ids], outputs=logits) model.build(input_shape=[(None, max_seq_len), (None, max_seq_len)]) # load bert layer weights load_stock_weights(bert, os.path.join(bert_model_dir, "bert_model.ckpt")) # freeze_bert_layers freeze_bert_layers(bert) model.compile( optimizer=tf.keras.optimizers.Adam(2e-5), loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[tf.keras.metrics.SparseCategoricalAccuracy(name='accuracy')]) # create tokenizer, chinese character needs no lower case. tokenizer = FullTokenizer(vocab_file=os.path.join(bert_model_dir, "vocab.txt"), do_lower_case=do_lower_case) return model, tokenizer
def _load_bert(self, bert_config_file, bert_ckpt_file): try: with tf.io.gfile.GFile(bert_config_file, 'r') as gf: bert_config = StockBertConfig.from_json_string(gf.read()) bert_params = map_stock_config_to_params(bert_config) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name='bert') except Exception as e: print(e) raise e input_ = keras.layers.Input(shape=(self.max_seq_len, ), dtype='int64', name="input_ids") x = bert(input_) # take the first embedding of BERT as the output embedding output_ = keras.layers.Lambda(lambda seq: seq[:,0,:])(x) model = keras.Model(inputs=input_, outputs=output_) model.build(input_shape=(None, self.max_seq_len)) load_stock_weights(bert, bert_ckpt_file) return model
def __init__(self): self.max_len = 29 self.config_path = './Intent_cl/Bert_model/bert_config.json' self.data = pd.read_csv('./Intent_cl/Intent_dataset/category_data.csv') with open('./Intent_cl/Bert_model/vocab.json', 'r') as read_file: self.vocab = json.loads(read_file.read()) with tf.io.gfile.GFile(self.config_path, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) self.bert_params = map_stock_config_to_params(bc) self.bert_params.adapter_size = None self.intent_model = keras.models.load_model( './Intent_cl/Bert_model/nomal_news_weather_etc_kobert_model_category.h5', custom_objects={ "BertModelLayer": BertModelLayer.from_params(self.bert_params, name="bert") }) self.classes = self.data.intent.unique().tolist()
def create_model(max_seq_len, lr=1e-5): """ Creates a BERT classification model. The model architecutre is raw input -> BERT input -> drop out layer to prevent overfitting -> dense layer that outputs predicted probability. max_seq_len: the maximum sequence length lr: learning rate of optimizer """ # create the bert layer with tf.io.gfile.GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") output = bert(input_ids) print("bert shape", output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) # Dropout layer cls_out = keras.layers.Dropout(0.8)(cls_out) # Dense layer with probibility output logits = keras.layers.Dense(units=2, activation="softmax")(cls_out) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) # load the pre-trained model weights load_stock_weights(bert, bert_ckpt_file) model.compile( optimizer=keras.optimizers.Adam(learning_rate=lr), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")]) model.summary() return model
def create_text_model(max_seq_len, bert_ckpt_file, bert_config_file, NUM_CLASS, overwriteLayerAndEmbeddingSize=False, isPreTrained=False, pathToBertModelWeights=None, isTrainable=True): with GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) if overwriteLayerAndEmbeddingSize: bc.max_position_embeddings = max_seq_len bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") bert_output = bert(input_ids) print("bert shape", bert_output.shape) cls_out = Lambda(lambda seq: seq[:, 0, :], name='bert_output_layer_768')(bert_output) cls_out = Dropout(0.5)(cls_out) output = Dense(NUM_CLASS, activation="softmax")(cls_out) # model_bert = Model(inputs=input_ids, outputs=output, name='BERT') model_bert.build(input_shape=(None, max_seq_len)) if not isPreTrained: load_stock_weights(bert, bert_ckpt_file) return model_bert else: model_bert.load_weights(pathToBertModelWeights) if not isTrainable: for layer in model_bert.layers: layer.trainable = False return model_bert, 2
def create_model(max_seq_len, bert_ckpt_dir, bert_config_file): with tf.io.gfile.GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = None bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = tf.keras.layers.Input(shape = (max_seq_len, ), dtype= tf.int32, name= 'input_ids') bert_output = bert(input_ids) bert_output = bert_output[:,0,:] drop_out = tf.keras.layers.Dropout(0.5)(bert_output) d_out = tf.keras.layers.Dense(768, activation='tanh')(drop_out) logits = tf.keras.layers.Dropout(0.5)(d_out) out = tf.keras.layers.Dense(2, activation='softmax')(logits) model = tf.keras.models.Model(inputs = input_ids, outputs = out) model.summary() load_stock_weights(bert, bert_ckpt_file) return model
def test2(): model_dir = "/Users/livingmagic/Documents/deeplearning/models/bert/chinese_L-12_H-768_A-12" bert_config_file = os.path.join(model_dir, "bert_config.json") bert_ckpt_file = os.path.join(model_dir, "bert_model.ckpt") with tf.io.gfile.GFile(bert_config_file, "r") as reader: stock_params = StockBertConfig.from_json_string(reader.read()) bert_params = stock_params.to_bert_model_layer_params() l_bert = BertModelLayer.from_params(bert_params, name="bert", trainable=False) # # Input and output endpoints max_seq_len = 128 l_input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32') output = l_bert(l_input_ids, training=False) # [batch_size, max_seq_len, hidden_size] print('Output shape: {}'.format(output.get_shape())) bert_model = keras.Model(inputs=l_input_ids, outputs=output) print(bert_model.trainable_weights)
def __init__(self, bert_config_file, bert_ckpt_file, max_seq_len, lr=1e-5): """ bert_config_file: path to bert configuration parameters bert_ckpt_file: path to pretrained bert checkpoint max_seq_len: maximum sequence lenght lr: learning rate """ # create the bert layer with tf.io.gfile.GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") output = bert(input_ids) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) # Dropout layer cls_out = keras.layers.Dropout(0.8)(cls_out) # Dense layer with probibility output logits = keras.layers.Dense(units=2, activation="softmax")(cls_out) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) # load the pre-trained model weights load_stock_weights(bert, bert_ckpt_file) model.compile( optimizer=keras.optimizers.Adam(learning_rate=lr), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")]) self.model = model
def create_model(config, adapter_size=64): """Creates a classification model.""" # create the bert layer with tf.io.gfile.GFile(config.bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = adapter_size bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(config.max_seq_len, ), dtype='int32', name="input_ids") output = bert(input_ids) matmul_qk = tf.matmul(output, output, transpose_b=True) attention_weights = tf.nn.softmax(matmul_qk, axis=-1) logits = tf.matmul(attention_weights, output) logits = tf.reduce_sum(logits, axis=1) * config.attn_weight cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])( output) * config.cls_weight logits = cls_out + logits logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.LayerNormalization()(logits) logits = keras.layers.Dense(units=len(config.classes))(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, config.max_seq_len)) # load the pre-trained model weights load_stock_weights(bert, config.bert_ckpt_file) # freeze weights if adapter-BERT is used # if adapter_size is not None: # freeze_bert_layers(bert) sigmoid_cross_entropy = tf.keras.losses.BinaryCrossentropy( from_logits=True, label_smoothing=config.label_smoothing) tfa_focal_loss = tfa.losses.SigmoidFocalCrossEntropy( alpha=config.focal_alpha, gamma=config.focal_gamma, from_logits=True) loss_func_list = { "sigmoid_cross_entropy_loss": sigmoid_cross_entropy, "focal_loss": tfa_focal_loss } model.compile(optimizer=keras.optimizers.Adam(), loss=loss_func_list[config.loss_func], metrics=[ MultiLabelAccuracy(batch_size=config.batch_size), MultiLabelPrecision(batch_size=config.batch_size), MultiLabelRecall(batch_size=config.batch_size), MultiLabelF1(batch_size=config.batch_size), HammingLoss(batch_size=config.batch_size) ]) model.summary() return model
def from_json_file(bert_config_file): with tf.io.gfile.GFile(bert_config_file, "r") as reader: stock_params = StockBertConfig.from_json_string(reader.read()) bert_params = stock_params.to_bert_model_layer_params() return bert_params
def build_encoder(config_file): with tf.io.gfile.GFile(config_file, "r") as reader: stock_params = StockBertConfig.from_json_string(reader.read()) bert_params = stock_params.to_bert_model_layer_params() return BertModelLayer.from_params(bert_params, name="bert")