Ejemplo n.º 1
0
def load_stock_weights(bert: BertModelLayer, ckpt_file):
    assert isinstance(
        bert, BertModelLayer
    ), "Expecting a BertModelLayer instance as first argument"
    assert tf.compat.v1.train.checkpoint_exists(
        ckpt_file), "Checkpoint does not exist: {}".format(ckpt_file)
    ckpt_reader = tf.train.load_checkpoint(ckpt_file)

    bert_prefix = bert.weights[0].name.split("/")[0]

    weights = []
    for weight in bert.weights:
        stock_name = map_to_stock_variable_name(weight.name, bert_prefix)

        if ckpt_reader.has_tensor(stock_name):
            value = ckpt_reader.get_tensor(stock_name)
            weights.append(value)
        else:
            print("loader: No value for:[{}], i.e.:[{}] in:[{}]".format(
                weight.name, stock_name, ckpt_file))
            # raise ValueError("No value for:[{}], i.e.:[{}] in:[{}]".format(weight.name, stock_name, ckpt_file))
            weights.append(weight.value())

    bert.set_weights(weights)
    print("Done loading {} BERT weights from: {} into {} (prefix:{})".format(
        len(weights), ckpt_file, bert, bert_prefix))
Ejemplo n.º 2
0
def create_model(max_seq_len, bert_ckpt_file, adapter_size):

  with tf.io.gfile.GFile(bert_config_file, "r") as reader:
    bc = StockBertConfig.from_json_string(reader.read())
    bert_params = map_stock_config_to_params(bc)
    bert_params.adapter_size = adapter_size 
    bert = BertModelLayer.from_params(bert_params, name="bert")
        
  input_ids = keras.layers.Input(shape=(max_seq_len, ), dtype='int32', name="input_ids")
  bert_output = bert(input_ids)

  print("bert shape", bert_output.shape)

  cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(bert_output)
  cls_out = keras.layers.Dropout(0.5)(cls_out)
  logits = keras.layers.Dense(units=768, activation="tanh")(cls_out)
  logits = keras.layers.Dropout(0.5)(logits)
  logits = keras.layers.Dense(units=len(CLASSES), activation="softmax")(logits)

  model = keras.Model(inputs=input_ids, outputs=logits)
  model.build(input_shape=(None, max_seq_len))

  load_stock_weights(bert, bert_ckpt_file)

  if adapter_size is not None:
    freeze_bert_layers(bert)

  return model
Ejemplo n.º 3
0
def map_stock_config_to_params(bc):
    """
    Converts the original BERT or ALBERT config dictionary
    to a `BertModelLayer.Params` instance.
    :return: a `BertModelLayer.Params` instance.
    """
    bert_params = BertModelLayer.Params(
        num_layers=bc.num_hidden_layers,
        num_heads=bc.num_attention_heads,
        hidden_size=bc.hidden_size,
        hidden_dropout=bc.hidden_dropout_prob,
        attention_dropout=bc.attention_probs_dropout_prob,

        intermediate_size=bc.intermediate_size,
        intermediate_activation=bc.hidden_act,

        vocab_size=bc.vocab_size,
        use_token_type=True,
        use_position_embeddings=True,
        token_type_vocab_size=bc.type_vocab_size,
        max_position_embeddings=bc.max_position_embeddings,

        embedding_size=bc.embedding_size,
        shared_layer=bc.embedding_size is not None,
    )
    return bert_params
Ejemplo n.º 4
0
def create_model(max_seq_len, lr=1e-5):
    """
    Creates a BERT classification model. 
    The model architecutre is raw input -> BERT input -> drop out layer to prevent overfitting -> dense layer that outputs predicted probability.

    max_seq_len: the maximum sequence length
    lr: learning rate of optimizer
    """

    # create the bert layer
    with tf.io.gfile.GFile(bert_config_file, "r") as reader:
        bc = StockBertConfig.from_json_string(reader.read())
        bert_params = map_stock_config_to_params(bc)
        bert = BertModelLayer.from_params(bert_params, name="bert")

    input_ids = keras.layers.Input(shape=(max_seq_len, ),
                                   dtype='int32',
                                   name="input_ids")
    output = bert(input_ids)

    print("bert shape", output.shape)
    cls_out = keras.layers.Lambda(lambda seq: seq[:, 0, :])(output)
    # Dropout layer
    cls_out = keras.layers.Dropout(0.8)(cls_out)
    # Dense layer with probibility output
    logits = keras.layers.Dense(units=2, activation="softmax")(cls_out)

    model = keras.Model(inputs=input_ids, outputs=logits)
    model.build(input_shape=(None, max_seq_len))

    # load the pre-trained model weights
    load_stock_weights(bert, bert_ckpt_file)

    model.compile(
        optimizer=keras.optimizers.Adam(learning_rate=lr),
        loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=[keras.metrics.SparseCategoricalAccuracy(name="acc")])

    model.summary()

    return model
Ejemplo n.º 5
0
    else:
        templabel.append(0)

y = np.array(templabel)

####################################################

# BERT TOKENIZATION

BertTokenizer = bert_tokenization.FullTokenizer
from bert.loader import params_from_pretrained_ckpt  #  these are necessary because of weird  ImportError: cannot import name 'BertModelLayer' from 'bert' (unknown location) errors
from bert.model import BertModelLayer

bert_params = params_from_pretrained_ckpt(
    'D:\\uncased_L-4_H-256_A-4')  # from google, not tensorflow hub
bert_layer1 = BertModelLayer.from_params(
    bert_params, name="bert")  # # hidden_dropout = 0.1,

model_name = 'uncased_L-4_H-256_A-4'

vocabulary_file = os.path.join('D:\\uncased_L-4_H-256_A-4\\vocab.txt')
to_lower_case = not (model_name.find("cased") == 0
                     or model_name.find("multi_cased") == 0)
tokenizer = BertTokenizer(vocabulary_file, to_lower_case)

max_seq_length = 256
train_tokens = map(tokenizer.tokenize, list(
    subsetdf.loc[:,
                 'content']))  # go all the way back to a list of raw strings
train_tokens = map(lambda tok: ["[CLS]"] + tok + ["[SEP]"], train_tokens)
train_token_ids = list(map(tokenizer.convert_tokens_to_ids, train_tokens))