def load_stock_weights(bert: BertModelLayer, ckpt_file): assert isinstance( bert, BertModelLayer ), "Expecting a BertModelLayer instance as first argument" assert tf.compat.v1.train.checkpoint_exists( ckpt_file), "Checkpoint does not exist: {}".format(ckpt_file) ckpt_reader = tf.train.load_checkpoint(ckpt_file) bert_prefix = bert.weights[0].name.split("/")[0] weights = [] for weight in bert.weights: stock_name = map_to_stock_variable_name(weight.name, bert_prefix) if ckpt_reader.has_tensor(stock_name): value = ckpt_reader.get_tensor(stock_name) weights.append(value) else: print("loader: No value for:[{}], i.e.:[{}] in:[{}]".format( weight.name, stock_name, ckpt_file)) # raise ValueError("No value for:[{}], i.e.:[{}] in:[{}]".format(weight.name, stock_name, ckpt_file)) weights.append(weight.value()) bert.set_weights(weights) print("Done loading {} BERT weights from: {} into {} (prefix:{})".format( len(weights), ckpt_file, bert, bert_prefix))
def create_model(max_seq_len, bert_ckpt_file, adapter_size): with tf.io.gfile.GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert_params.adapter_size = adapter_size bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") bert_output = bert(input_ids) print("bert shape", bert_output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output) cls_out = keras.layers.Dropout(0.5)(cls_out) logits = keras.layers.Dense(units=768, activation="tanh")(cls_out) logits = keras.layers.Dropout(0.5)(logits) logits = keras.layers.Dense(units=len(CLASSES), activation="softmax")(logits) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) load_stock_weights(bert, bert_ckpt_file) if adapter_size is not None: freeze_bert_layers(bert) return model
def map_stock_config_to_params(bc): """ Converts the original BERT or ALBERT config dictionary to a `BertModelLayer.Params` instance. :return: a `BertModelLayer.Params` instance. """ bert_params = BertModelLayer.Params( num_layers=bc.num_hidden_layers, num_heads=bc.num_attention_heads, hidden_size=bc.hidden_size, hidden_dropout=bc.hidden_dropout_prob, attention_dropout=bc.attention_probs_dropout_prob, intermediate_size=bc.intermediate_size, intermediate_activation=bc.hidden_act, vocab_size=bc.vocab_size, use_token_type=True, use_position_embeddings=True, token_type_vocab_size=bc.type_vocab_size, max_position_embeddings=bc.max_position_embeddings, embedding_size=bc.embedding_size, shared_layer=bc.embedding_size is not None, ) return bert_params
def create_model(max_seq_len, lr=1e-5): """ Creates a BERT classification model. The model architecutre is raw input -> BERT input -> drop out layer to prevent overfitting -> dense layer that outputs predicted probability. max_seq_len: the maximum sequence length lr: learning rate of optimizer """ # create the bert layer with tf.io.gfile.GFile(bert_config_file, "r") as reader: bc = StockBertConfig.from_json_string(reader.read()) bert_params = map_stock_config_to_params(bc) bert = BertModelLayer.from_params(bert_params, name="bert") input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids") output = bert(input_ids) print("bert shape", output.shape) cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output) # Dropout layer cls_out = keras.layers.Dropout(0.8)(cls_out) # Dense layer with probibility output logits = keras.layers.Dense(units=2, activation="softmax")(cls_out) model = keras.Model(inputs=input_ids, outputs=logits) model.build(input_shape=(None, max_seq_len)) # load the pre-trained model weights load_stock_weights(bert, bert_ckpt_file) model.compile( optimizer=keras.optimizers.Adam(learning_rate=lr), loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")]) model.summary() return model
else: templabel.append(0) y = np.array(templabel) #################################################### # BERT TOKENIZATION BertTokenizer = bert_tokenization.FullTokenizer from bert.loader import params_from_pretrained_ckpt # these are necessary because of weird ImportError: cannot import name 'BertModelLayer' from 'bert' (unknown location) errors from bert.model import BertModelLayer bert_params = params_from_pretrained_ckpt( 'D:\\uncased_L-4_H-256_A-4') # from google, not tensorflow hub bert_layer1 = BertModelLayer.from_params( bert_params, name="bert") # # hidden_dropout = 0.1, model_name = 'uncased_L-4_H-256_A-4' vocabulary_file = os.path.join('D:\\uncased_L-4_H-256_A-4\\vocab.txt') to_lower_case = not (model_name.find("cased") == 0 or model_name.find("multi_cased") == 0) tokenizer = BertTokenizer(vocabulary_file, to_lower_case) max_seq_length = 256 train_tokens = map(tokenizer.tokenize, list( subsetdf.loc[:, 'content'])) # go all the way back to a list of raw strings train_tokens = map(lambda tok: ["[CLS]"] + tok + ["[SEP]"], train_tokens) train_token_ids = list(map(tokenizer.convert_tokens_to_ids, train_tokens))