Example #1
0
 def __init__(self, configs, vocab_size, num_classes):
     super(NerModel, self).__init__()
     self.use_bert = configs.use_bert
     self.finetune = configs.finetune
     if self.use_bert and self.finetune:
         self.bert_model = TFBertModel.from_pretrained('bert-base-chinese')
     self.use_bigru = configs.use_bigru
     self.embedding = tf.keras.layers.Embedding(vocab_size,
                                                configs.embedding_dim,
                                                mask_zero=True)
     self.hidden_dim = configs.hidden_dim
     self.dropout_rate = configs.dropout
     self.dropout = tf.keras.layers.Dropout(self.dropout_rate)
     self.bigru = tf.keras.layers.Bidirectional(
         tf.keras.layers.GRU(self.hidden_dim, return_sequences=True))
     self.dense = tf.keras.layers.Dense(num_classes)
     self.transition_params = tf.Variable(
         tf.random.uniform(shape=(num_classes, num_classes)))
 def __init__(self, configs, data_manager, logger):
     self.dataManager = data_manager
     vocab_size = data_manager.max_token_number
     num_classes = data_manager.max_label_number
     self.logger = logger
     self.configs = configs
     logger.info('loading model parameter')
     if self.configs.use_bert and not self.configs.finetune:
         self.bert_model = TFBertModel.from_pretrained('bert-base-chinese')
     self.ner_model = NerModel(configs, vocab_size, num_classes)
     # 实例化Checkpoint,设置恢复对象为新建立的模型
     if configs.finetune:
         checkpoint = tf.train.Checkpoint(ner_model=self.ner_model)
     else:
         checkpoint = tf.train.Checkpoint(ner_model=self.ner_model)
     checkpoint.restore(tf.train.latest_checkpoint(
         configs.checkpoints_dir))  # 从文件恢复模型参数
     logger.info('loading model successfully')
Example #3
0
    def bert_large_uncased_for_squad2(self, max_seq_length):
        input_ids = Input((max_seq_length, ), dtype=tf.int32, name='input_ids')
        input_masks = Input((max_seq_length, ),
                            dtype=tf.int32,
                            name='input_masks')

        #Load model from huggingface
        config = BertConfig.from_pretrained(self.config.named_model,
                                            output_hidden_states=True)
        bert_layer = TFBertModel.from_pretrained(self.named_model,
                                                 config=config)

        outputs, _, embeddings = bert_layer(
            [input_ids, input_masks])  #1 for pooled outputs, 0 for sequence

        model = Model(inputs=[input_ids, input_masks],
                      outputs=[embeddings, outputs])
        return model
def create_model1():
    q_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)
    a_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)

    q_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)
    a_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)

    q_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)
    a_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)

    config = BertConfig()
    config.output_hidden_states = False

    bert_model = TFBertModel.from_pretrained('bert-base-uncased',
                                             config=config)

    q_embedding = bert_model(q_id, attention_mask=q_mask,
                             token_type_ids=q_atn)[0]
    a_embedding = bert_model(a_id, attention_mask=a_mask,
                             token_type_ids=a_atn)[0]

    q = tf.keras.layers.GlobalAveragePooling1D()(q_embedding)
    a = tf.keras.layers.GlobalAveragePooling1D()(a_embedding)

    x = tf.keras.layers.Concatenate()([q, q])
    x = tf.keras.layers.Reshape((1, x.shape[-1]))(x)

    cnn = tf.keras.layers.Conv1D(64, 3, padding='same', activation='relu')(x)
    cnn = tf.keras.layers.MaxPooling1D(pool_size=1, strides=2)(cnn)
    cnn = tf.keras.layers.BatchNormalization()(cnn)

    lstm = tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(units=64))(cnn)
    lstm = tf.keras.layers.Dropout(0.2)(lstm)

    dense = tf.keras.layers.Dense(64, activation='relu')(lstm)
    x = tf.keras.layers.BatchNormalization()(x)

    x = tf.keras.layers.Dense(TARGET_COUNT1, activation='softmax')(dense)
    x = tf.keras.layers.BatchNormalization()(x)

    model = tf.keras.models.Model(
        inputs=[q_id, q_mask, q_atn, a_id, a_mask, a_atn], outputs=x)

    return model
Example #5
0
def get_model():
    K.clear_session()
    
    bert_model = TFBertModel.from_pretrained(bert_path, from_pt=True, trainable=True)
    for l in bert_model.layers:
        l.trainable = True
 
    input_ids_texta = Input(shape=(None,), dtype='int32', name='input_ids_texta')
    input_token_type_ids_texta = Input(shape=(None,), dtype='int32', name='input_token_type_ids_texta')
    input_attention_mask_texta = Input(shape=(None,), dtype='int32', name='input_attention_mask_texta')
    input_ids_textb = Input(shape=(None,), dtype='int32', name='input_ids_textb')
    input_token_type_ids_textb = Input(shape=(None,), dtype='int32', name='input_token_type_ids_textb')
    input_attention_mask_textb = Input(shape=(None,), dtype='int32', name='input_attention_mask_textb')
    input_token_type_ids_textb = Input(shape=(None,), dtype='int32', name='input_token_type_ids_textb')
    input_cat_texta = Input(shape=(1), dtype='float32', name='input_cat_texta')
    input_cat_textb = Input(shape=(1), dtype='float32', name='input_cat_textb')
 
    bert_output_texta = bert_model({'input_ids':input_ids_texta, 'token_type_ids':input_token_type_ids_texta, 'attention_mask':input_attention_mask_texta}, return_dict=False, training=True)
    projection_logits_texta = bert_output_texta[0]
    bert_cls_texta = Lambda(lambda x: x[:, 0])(projection_logits_texta) # 取出[CLS]对应的向量用来做分类

    bert_output_textb = bert_model({'input_ids':input_ids_textb, 'token_type_ids':input_token_type_ids_textb, 'attention_mask':input_attention_mask_textb}, return_dict=False, training=True)
    projection_logits_textb = bert_output_textb[0]
    bert_cls_textb = Lambda(lambda x: x[:, 0])(projection_logits_textb) # 取出[CLS]对应的向量用来做分类

    bert_cls = concatenate([bert_cls_texta, bert_cls_textb, input_cat_texta, input_cat_textb], axis=-1)
    
    dropout_A = Dropout(0.5)(bert_cls)
    output_A = Dense(1, activation='sigmoid', name='output_A')(dropout_A)
    
    dropout_B = Dropout(0.5)(bert_cls)
    output_B = Dense(1, activation='sigmoid', name='output_B')(dropout_B)
 
    model = Model([input_ids_texta, input_token_type_ids_texta, input_attention_mask_texta, input_ids_textb, input_token_type_ids_textb, input_attention_mask_textb, input_cat_texta, input_cat_textb], [output_A, output_B])
    model.compile(
                  loss=my_binary_crossentropy,
#                   loss='binary_crossentropy',
#                   loss=binary_crossentropy,
                  optimizer=Adam(1e-5),    #用足够小的学习率
                  metrics=[my_binary_accuracy, my_f1_score]
#                   metrics='accuracy'
                 )
    print(model.summary())
    return model
Example #6
0
def get_bert():
    ids = keras.layers.Input(shape=(None,), dtype=tf.int32, name='ids')
    att = keras.layers.Input(shape=(None,), dtype=tf.int32, name='att')
    tok_type_ids = keras.layers.Input(shape=(None,), dtype=tf.int32, name='tti')

    config = BertConfig.from_pretrained(Config.Bert.config)
    bert_model = TFBertModel.from_pretrained(Config.Bert.model, config=config)

    x = bert_model(ids, attention_mask=att, token_type_ids=tok_type_ids)

    x1 = keras.layers.Dropout(0.15)(x[0])
    x1 = keras.layers.Conv1D(768, 2, padding='same')(x1)
    x1 = keras.layers.LeakyReLU()(x1)
    x1 = keras.layers.LayerNormalization()(x1)
    x1 = keras.layers.Conv1D(64, 2, padding='same')(x1)
    x1 = keras.layers.LeakyReLU()(x1)
    x1 = keras.layers.LayerNormalization()(x1)
    x1 = keras.layers.Conv1D(32, 2, padding='same')(x1)
    x1 = keras.layers.Conv1D(1, 1)(x1)
    x1 = keras.layers.Flatten()(x1)
    x1 = keras.layers.Activation('softmax', dtype='float32', name='sts')(x1)

    x2 = keras.layers.Dropout(0.15)(x[0])
    x2 = keras.layers.Conv1D(768, 2, padding='same')(x2)
    x2 = keras.layers.LeakyReLU()(x2)
    x2 = keras.layers.LayerNormalization()(x2)
    x2 = keras.layers.Conv1D(64, 2, padding='same')(x2)
    x2 = keras.layers.LeakyReLU()(x2)
    x2 = keras.layers.LayerNormalization()(x2)
    x2 = keras.layers.Conv1D(32, 2, padding='same')(x2)
    x2 = keras.layers.Conv1D(1, 1)(x2)
    x2 = keras.layers.Flatten()(x2)
    x2 = keras.layers.Activation('softmax', dtype='float32', name='ets')(x2)

    model = keras.models.Model(inputs=[ids, att, tok_type_ids], outputs=[x1, x2])

    optimizer = keras.optimizers.Adam(learning_rate=6e-5)
    if Config.Train.use_amp:
        optimizer = keras.mixed_precision.experimental.LossScaleOptimizer(optimizer, 'dynamic')
    loss = keras.losses.CategoricalCrossentropy(label_smoothing=Config.Train.label_smoothing)
    model.compile(loss=loss, optimizer=optimizer)

    return model
Example #7
0
def create_model():
    q_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)
    q_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)
    q_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)

    config = BertConfig()
    config.output_hidden_states = False

    bert_model = TFBertModel.from_pretrained('bert-base-uncased',
                                             config=config)
    q_embedding = bert_model(q_id, attention_mask=q_mask,
                             token_type_ids=q_atn)[0]
    q = tf.keras.layers.GlobalAveragePooling1D()(q_embedding)

    x = tf.keras.layers.Dropout(0.2)(q)
    x = tf.keras.layers.Dense(TARGET_COUNT, activation='sigmoid')(x)

    model = tf.keras.models.Model(inputs=[q_id, q_mask, q_atn], outputs=x)
    return model
Example #8
0
def check_compability(
    torch_model: BertModel,
    tf_model: TFBertModel
):
    torch_weights = []
    for k, v in torch_model.state_dict().items():
        if k == "embeddings.position_ids":
            print("im here")
            continue
        if not k.startswith("embeddings.") and k.endswith(".weight"):
            torch_weights.append(v.t().numpy())
        else:
            torch_weights.append(v.numpy())
    torch_weights[1], torch_weights[2] = torch_weights[2], torch_weights[1]

    tf_weights = tf_model.get_weights()

    check = [(torch_weight == tf_weight).all() for torch_weight, tf_weight in zip(torch_weights, tf_weights)]
    return all(check)
Example #9
0
def tokenize(text_list, tags_list, tag_dict, max_token_len):
    """ load tags to dict:
        Args:
            text list
            tag list
            tag dict - {tag: tag id}
        Returns:
            embeddings list - (data size, max token length, 768)
            token length list - (data size, 1)
            tags list - (batch size, max token length, tag size)
    """
    tokenizer = BertTokenizer.from_pretrained('bert-base-chinese')
    bert_model = TFBertModel.from_pretrained('bert-base-chinese')

    token_len_list = []
    embeddings_list = []

    for text, tags in zip(text_list, tags_list):
        inputs = tokenizer(text, max_length=max_token_len, padding="max_length", truncation=True, return_tensors="np")
        token_len = np.sum(inputs.data['attention_mask'])

        if token_len - 2 != len(tags):
            continue

        outputs = bert_model(inputs)

        embeddings = outputs.last_hidden_state.numpy()[0]
        embeddings_list.append(embeddings)

        token_len_list.append(token_len)

        tags.insert(0, tag_dict['O'])

        cur_len = len(tags)
        for i in range(cur_len, max_token_len):
            tags.append(tag_dict['O'])

    embeddings_list = np.array(embeddings_list)
    token_len_list = np.array(token_len_list)
    tags_list = np.array(tags_list, dtype=np.int32)

    return embeddings_list, token_len_list, tags_list
def _embedding_from_bert():

    log.info("Extracting pretrained word embeddings weights from BERT")
    #BERT_MODEL_URL = "https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/1"

    # dinput_word_ids = tf.keras.layers.Input(shape=(config.summ_length,), dtype=tf.int32,
    #                                       name="input_word_ids")
    # dinput_mask = tf.keras.layers.Input(shape=(config.summ_length,), dtype=tf.int32,
    #                                   name="input_mask")
    # dsegment_ids = tf.keras.layers.Input(shape=(config.summ_length,), dtype=tf.int32,
    #                                     name="segment_ids")
    # einput_word_ids = tf.keras.layers.Input(shape=(config.doc_length,), dtype=tf.int32,
    #                                       name="input_word_ids")
    # einput_mask = tf.keras.layers.Input(shape=(config.doc_length,), dtype=tf.int32,
    #                                   name="input_mask")
    # esegment_ids = tf.keras.layers.Input(shape=(config.doc_length,), dtype=tf.int32,
    #                                     name="segment_ids")
    #bert_layer = hub.KerasLayer(BERT_MODEL_URL, trainable=False)

    vocab_of_BERT = TFBertModel.from_pretrained('bert-base-uncased',
                                                trainable=False)
    embedding_matrix = vocab_of_BERT.get_weights()[0]
    # trainable_vars = vocab_of_BERT.variables
    # # Remove unused layers
    # trainable_vars = [var for var in trainable_vars if not "/cls/" in var.name]

    # # Select how many layers to fine tune
    # trainable_vars = []

    # # Add to trainable weights
    # for var in trainable_vars:
    #     vocab_of_BERT.trainable_weights.append(var)

    # for var in vocab_of_BERT.variables:
    #     if var not in vocab_of_BERT.trainable_weights:
    #         vocab_of_BERT.non_trainable_weights.append(var)
    #_, dsequence_output = vocab_of_BERT([dinput_word_ids, dinput_mask, dsegment_ids])
    #_, esequence_output = vocab_of_BERT([einput_word_ids, einput_mask, esegment_ids])
    #dec_model = tf.keras.models.Model(inputs=[dinput_word_ids, dinput_mask, dsegment_ids], outputs=dsequence_output)
    #enc_model = tf.keras.models.Model(inputs=[einput_word_ids, einput_mask, esegment_ids], outputs=esequence_output)
    log.info(f"Embedding matrix shape '{embedding_matrix.shape}'")
    return (embedding_matrix, vocab_of_BERT)
Example #11
0
def get_bert_imdb_model():
    from transformers import TFBertModel
    max_len = 512

    ## BERT encoder
    # encoder = TFBertModel.from_pretrained("bert-base-uncased")

    ## QA Model
    input_ids = layers.Input(shape=(max_len, ), dtype=tf.int32)
    token_type_ids = layers.Input(shape=(max_len, ), dtype=tf.int32)
    attention_mask = layers.Input(shape=(max_len, ), dtype=tf.int32)

    # inputs = {'input_ids': input_ids, 'token_type_ids': token_type_ids, 'attention_mask': attention_mask}

    inputs = [input_ids, token_type_ids, attention_mask]

    # name = "distilbert-base-uncased"
    name = "bert-base-cased"

    # m = TFAutoModelForSequenceClassification.from_pretrained(name)

    # bert = m.layers[0]

    bert = TFBertModel.from_pretrained(name)

    bert.trainable = True

    bert_outputs = bert(input_ids=input_ids,
                        token_type_ids=token_type_ids,
                        attention_mask=attention_mask)

    last_hidden_states = bert_outputs.last_hidden_state
    avg = layers.GlobalAveragePooling1D()(last_hidden_states)
    avg = tf.keras.layers.Dense(128, activation='relu')(avg)
    output = layers.Dense(2, activation="softmax")(avg)
    model = tf.keras.Model(inputs=inputs, outputs=output)

    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
    optimizer = tf.keras.optimizers.Adam(lr=5e-5)
    # optimizer = 'adam'
    model.compile(optimizer=optimizer, loss=[loss], metrics=['accuracy'])
    return model
    def bert(self, batch_size=256, max_seq_len=100):
        def preprocess(docs, max_size):

            tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
            docs = [" ".join(x.split()[:max_size]) for x in docs]
            max_size = max(max_size, max([len(x.split()) for x in docs]))

            return tokenizer, max_size

        texts = self.df["text"].values.tolist()
        tokenizer, max_len = preprocess(texts, max_seq_len)

        model = TFBertModel.from_pretrained('bert-base-uncased',
                                            output_hidden_states=True)
        X = prepareBertInput(tokenizer, texts, max_len)

        bert_embed = np.zeros((len(texts), 768))
        for start_idx in range(0, len(texts), batch_size):
            end_idx = min(start_idx + batch_size, len(texts))
            batch = texts[start_idx:end_idx]
            input_ids = [
                X[0][start_idx:end_idx], X[1][start_idx:end_idx],
                X[2][start_idx:end_idx]
            ]
            outputs = model(input_ids)
            layer4 = outputs[-1][-4][:, 1:, :]
            layer3 = outputs[-1][-3][:, 1:, :]
            layer2 = outputs[-1][-2][:, 1:, :]
            layer1 = outputs[-1][-1][:, 1:, :]
            avg = tf.keras.layers.Average()
            avg_embedding = avg([layer4, layer3, layer2, layer1])
            embedding = tf.math.reduce_mean(avg_embedding, axis=1)
            bert_embed[start_idx:end_idx, :] = embedding

        feature_length = bert_embed.shape[-1]
        bert_vecs = pd.DataFrame(
            bert_embed,
            index=self.df.index,
            columns=["BERT.{}".format(i) for i in range(feature_length)])
        self.df = pd.concat((self.df, bert_vecs), axis=1, sort=False)
        return
Example #13
0
def build_features():
    tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
    bert = TFBertModel.from_pretrained('bert-base-uncased')
    data = get_data()
    data_train, data_val_model, data_val_interpretation, data_test = get_train_val_test_splits(
        data)
    original_train_dataset = glue_convert_examples_to_features(
        utils.data_to_tf_dataset(data_train),
        tokenizer,
        max_length=utils.MAX_SEQ_LENGTH,
        task='cola')
    original_valid_dataset = glue_convert_examples_to_features(
        utils.data_to_tf_dataset(data_val_model),
        tokenizer,
        max_length=utils.MAX_SEQ_LENGTH,
        task='cola')
    f_train, f_val = make_features(original_train_dataset,
                                   bert), make_features(
                                       original_valid_dataset, bert)
    np.save("train_features.npy", f_train)
    np.save("valid_features.npy", f_val)
Example #14
0
    def __init__(self, label_size):
        super(BilstmCRF, self).__init__(self)
        self.embedding = TFBertModel.from_pretrained(
            '../resource/ernie_torch/',
            from_pt=True,
            output_hidden_states=True)
        # 设置Embedding层不训练
        self.embedding.trainable = False

        # CNN
        self.conv1 = tf.keras.layers.Conv1D(filters=192,
                                            kernel_size=1,
                                            padding="same",
                                            activation='relu')
        self.conv2 = tf.keras.layers.Conv1D(filters=192,
                                            kernel_size=2,
                                            padding="same",
                                            activation='relu')
        self.conv3 = tf.keras.layers.Conv1D(filters=192,
                                            kernel_size=3,
                                            padding="same",
                                            activation='relu')
        self.conv4 = tf.keras.layers.Conv1D(filters=192,
                                            kernel_size=4,
                                            padding="same",
                                            activation='relu')

        self.dropout = tf.keras.layers.Dropout(0.4)

        # 双向LSTM
        self.bilstm = tf.keras.layers.Bidirectional(
            tf.keras.layers.LSTM(256, return_sequences=True))

        # CRF层参数
        self.transition = tf.Variable(
            tf.initializers.GlorotNormal()(shape=(label_size, label_size)))

        self.dense = tf.keras.layers.Dense(label_size, name='dense_out')
        self.layer_norm1 = tf.keras.layers.LayerNormalization(epsilon=1e-5)
        self.layer_norm2 = tf.keras.layers.LayerNormalization(epsilon=1e-5)
Example #15
0
 def __init__(self,
              intent_size,
              slot_size,
              lr=1e-4,
              dropout_rate=0.2,
              units=300):
     super().__init__()
     self.bert = TFBertModel.from_pretrained('bert-base-uncased',
                                             trainable=True)
     self.inp_dropout = Dropout(dropout_rate)
     self.intent_dropout = Dropout(dropout_rate)
     self.fc_intent = Dense(units, activation='relu')
     self.trans_params = self.add_weight(shape=(slot_size, slot_size))
     self.out_linear_intent = Dense(intent_size)
     self.out_linear_slot = Dense(slot_size)
     self.optimizer = Adam(lr)
     self.slots_accuracy = tf.keras.metrics.Accuracy()
     self.intent_accuracy = tf.keras.metrics.Accuracy()
     self.decay_lr = tf.optimizers.schedules.ExponentialDecay(
         lr, 1000, 0.95)
     self.logger = logging.getLogger('tensorflow')
     self.logger.setLevel(logging.INFO)
def create_model():
    id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)
    mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)
    attn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)
    config = BertConfig()
    config.output_hidden_states = True
    bert_model = TFBertModel.from_pretrained(
        'https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-tf_model.h5',
        config=config)
    _, _, hidden_states = bert_model(id,
                                     attention_mask=mask,
                                     token_type_ids=attn)
    h12 = tf.reshape(hidden_states[-1][:, 0], (-1, 1, 768))
    h11 = tf.reshape(hidden_states[-2][:, 0], (-1, 1, 768))
    h10 = tf.reshape(hidden_states[-3][:, 0], (-1, 1, 768))
    h09 = tf.reshape(hidden_states[-4][:, 0], (-1, 1, 768))
    concat_hidden = tf.keras.layers.Concatenate(axis=2)([h12, h11, h10, h09])
    x = tf.keras.layers.GlobalAveragePooling1D()(concat_hidden)
    x = tf.keras.layers.Dropout(0.2)(x)
    x = tf.keras.layers.Dense(MAX_TARGET_LEN, activation='sigmoid')(x)
    model = tf.keras.models.Model(inputs=[id, mask, attn], outputs=x)
    return model
Example #17
0
 def __init__(self, params, name="model", **kwargs):
     super(NERwithHFBERT, self).__init__(params, name=name, **kwargs)
     self._tag_string_mapper = get_sm(self._params.tags_fn_)
     self.tag_vocab_size = self._tag_string_mapper.size() + 2
     self._tracked_layers = dict()
     if self.pretrained_bert is None:
         if self._params.use_hf_electra_model_:
             self.pretrained_bert = TFElectraModel(ElectraConfig.from_pretrained(params.pretrained_hf_model_,cache_dir=params.hf_cache_dir_))
         else:
             self.pretrained_bert = TFBertModel(BertConfig.from_pretrained(params.pretrained_hf_model_,cache_dir=params.hf_cache_dir_))
     self._dropout = tf.keras.layers.Dropout(self._params.dropout_last)
     if self._params.bet_tagging_:
         # print(self.tag_vocab_size-1)
         # half of the classes is used plus O-Class, sos, eos
         self._layer_cls = tf.keras.layers.Dense(
             int(self._tag_string_mapper.size() // 2 + 3), activation=tf.keras.activations.softmax, name="layer_cls"
         )
         self._layer_start = tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid, name="layer_start")
         self._layer_end = tf.keras.layers.Dense(1, activation=tf.keras.activations.sigmoid, name="layer_end")
     elif self._params.use_crf:
         self._last_layer = tf.keras.layers.Dense(self.tag_vocab_size, name="last_layer")
         self._trans_params = tf.keras.layers.Embedding(
             self.tag_vocab_size, self.tag_vocab_size, name="trans_params"
         )
         # ,embeddings_initializer=tf.keras.initializers.Constant(1))
         if self._params.crf_with_ner_rule:
             self._penalty_factor = tf.keras.layers.Embedding(1, 1, name="penalty_factor")
             # ,embeddings_initializer=tf.keras.initializers.Constant(1))
             self._penalty_absolute = tf.keras.layers.Embedding(1, 1, name="penalty_absolute")
             # ,embeddings_initializer=tf.keras.initializers.Constant(1))
         elif self.params.crf_with_ner_forb_trans:
             self._penalty_factor = tf.constant(0.0, name="penalty_factor", dtype=tf.float32)
             self._penalty_absolute = tf.constant(-100000.0, name="penalty_absolute", dtype=tf.float32)
         self.init_crf_with_ner_rule((self.tag_vocab_size - 3) // 2)
     else:
         self._last_layer = tf.keras.layers.Dense(
             self.tag_vocab_size, activation=tf.keras.activations.softmax, name="last_layer"
         )
Example #18
0
    def __init__(self,
                 pretrained_model_name_or_path='bert-base-uncased',
                 reduce_output='cls_pooled',
                 trainable=True,
                 num_tokens=None,
                 **kwargs):
        super().__init__()
        try:
            from transformers import TFBertModel
        except ModuleNotFoundError:
            logger.error(
                ' transformers is not installed. '
                'In order to install all text feature dependencies run '
                'pip install ludwig[text]')
            sys.exit(-1)

        self.transformer = TFBertModel.from_pretrained(
            pretrained_model_name_or_path)
        self.reduce_output = reduce_output
        if not self.reduce_output == 'cls_pooled':
            self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)
        self.transformer.trainable = trainable
        self.transformer.resize_token_embeddings(num_tokens)
def create_model():
    ## BERT encoder
    encoder = TFBertModel.from_pretrained("bert-base-uncased")
    
    # QA Model
    input_ids = layers.Input(shape=(max_len,), dtype=tf.int32)
    token_type_ids = layers.Input(shape=(max_len,), dtype=tf.int32)
    attention_mask = layers.Input(shape=(max_len,), dtype=tf.int32)
    output = encoder(
        input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask
    )[1]
    
    output = layers.Dense(1, use_bias=True)(output)
    output = layers.Activation(keras.activations.sigmoid)(output)


    model = keras.Model(
        inputs=[input_ids, token_type_ids, attention_mask],
        outputs=output,
    )

    model.compile()
    return model
Example #20
0
    def bert_large_uncased_for_squad2(self, max_seq_length):
        input_ids = Input((max_seq_length, ), dtype=tf.int32, name='input_ids')
        input_masks = Input((max_seq_length, ),
                            dtype=tf.int32,
                            name='input_masks')
        input_tokens = Input((max_seq_length, ),
                             dtype=tf.int32,
                             name='input_tokens')

        #Load model from huggingface
        config = BertConfig.from_pretrained("bert-large-uncased",
                                            output_hidden_states=True)
        bert_layer = TFBertModel.from_pretrained(self.named_model,
                                                 config=config)
        if self.weights_file is not None:
            bert_layer.load_weights(self.weights_file)
        _, _, embeddings = bert_layer(
            [input_ids, input_masks,
             input_tokens])  #1 for pooled outputs, 0 for sequence

        model = Model(inputs=[input_ids, input_masks, input_tokens],
                      outputs=embeddings)
        return model
Example #21
0
    def __build_model__(self, learningrate=None, keep_prob=0.5, verbose=1):
        inputs = keras.layers.Input(shape=(self.sentence_length, ),
                                    dtype=tf.int32)
        bert = TFBertModel.from_pretrained('bert-base-uncased',
                                           trainable=False)(inputs)[0]
        average_pooling = keras.layers.GlobalAveragePooling1D()(bert)
        if self.hidden_layer > 0:
            hidden = keras.layers.Dense(self.hidden_layer,
                                        activation=tf.nn.relu)(average_pooling)
        else:
            hidden = average_pooling
        dropout = keras.layers.Dropout(1 - keep_prob)(hidden)
        outputs = keras.layers.Dense(1, activation=tf.nn.sigmoid)(dropout)
        model = keras.models.Model(inputs, outputs)

        model.compile(optimizer='adam',
                      loss='binary_crossentropy',
                      metrics=['accuracy'])

        if verbose:
            model.summary()

        return model
Example #22
0
 def __init__(self, config):
     super(MyModel, self).__init__()
     self.bert_model_config = BertConfig.from_pretrained(
         config.bert_model_config_path)
     self.bert_model = TFBertModel.from_pretrained(
         config.bert_model_weights_path, config=self.bert_model_config)
     self.convs = [
         tf.keras.layers.Conv2D(filters=config.num_filters,
                                kernel_size=(k, config.hidden_size),
                                padding='valid',
                                kernel_initializer='normal',
                                activation='relu')
         for k in config.filter_sizes
     ]
     self.pools = [
         tf.keras.layers.MaxPool2D(pool_size=(config.max_len - k + 1, 1),
                                   strides=(1, 1),
                                   padding='valid')
         for k in config.filter_sizes
     ]
     self.flatten = tf.keras.layers.Flatten()
     self.dropout = tf.keras.layers.Dropout(config.dropout)
     self.fc = Dense(config.num_classes, activation='softmax')
Example #23
0
def create_model():
    q_id = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)
    q_mask = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)
    q_atn = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ), dtype=tf.int32)

    config = BertConfig.from_pretrained(BERT_PATH)
    config.output_hidden_states = False
    bert_model = TFBertModel.from_pretrained(BERT_PATH,
                                             from_pt=True,
                                             config=config)
    q_embedding = bert_model(q_id, attention_mask=q_mask,
                             token_type_ids=q_atn)[0]
    q = tf.keras.layers.GlobalAveragePooling1D()(q_embedding)
    a = tf.keras.layers.GlobalMaxPooling1D()(q_embedding)
    t = q_embedding[:, -1]
    e = q_embedding[:, 0]
    x = tf.keras.layers.Concatenate()([q, a, t, e])

    x = tf.keras.layers.Dropout(0.5)(x)
    x = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    model = tf.keras.models.Model(inputs=[q_id, q_mask, q_atn], outputs=x)

    return model
Example #24
0
    def bert_large_uncased_for_squad2(self, max_seq_length):
        input_ids = Input((max_seq_length, ), dtype=tf.int32, name='input_ids')
        input_masks = Input((max_seq_length, ),
                            dtype=tf.int32,
                            name='input_masks')

        #Load model from huggingface
        bert_layer = TFBertModel.from_pretrained(self.named_model)

        outputs = bert_layer([input_ids, input_masks
                              ])[0]  #1 for pooled outputs, 0 for sequence

        #Dense layer with 2 nodes; one for start span and one for end span
        logits = Dense(2)(outputs)

        #Split the outputs into start and end logits
        start_logits, end_logits = tf.split(logits, 2, axis=-1)
        start_logits = K.squeeze(start_logits, axis=-1)
        end_logits = K.squeeze(end_logits, axis=-1)

        model = Model(inputs=[input_ids, input_masks],
                      outputs=[start_logits, end_logits])
        return model
Example #25
0
def initialize_hugface_model(hugging_face_model):
    # if hugging_face_model == "xlnet":
    #     tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased')
    #     model = TFXLNetModel.from_pretrained('xlnet-base-cased')
    # elif hugging_face_model == "roberta":
    #     tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    #     model = TFRobertaModel.from_pretrained('roberta-base')
    # elif hugging_face_model == "ernie":
    #     tokenizer = AutoTokenizer.from_pretrained("nghuyong/ernie-2.0-en")
    #     model = TFAutoModel.from_pretrained("nghuyong/ernie-2.0-en")

    #FAST TOKENIZERS
    if hugging_face_model == "distilbert":
        tokenizer = DistilBertTokenizerFast.from_pretrained("distilbert-base-uncased")
        model = TFDistilBertModel.from_pretrained("distilbert-base-uncased")
    elif hugging_face_model == "bert":
        tokenizer = BertTokenizerFast.from_pretrained('bert-base-cased')
        model = TFBertModel.from_pretrained('bert-base-cased')


    else:
        raise ValueError('Invalid embedding type')
    return tokenizer, model
Example #26
0
def create_model(num_tags):
    ## BERT encoder
    encoder = TFBertModel.from_pretrained("bert-base-uncased")
    encoder.save_pretrained(save_path)

    ## NER Model
    input_ids = layers.Input(shape=(max_len,), dtype=tf.int32)
    token_type_ids = layers.Input(shape=(max_len,), dtype=tf.int32)
    attention_mask = layers.Input(shape=(max_len,), dtype=tf.int32)
    embedding = encoder(
        input_ids, token_type_ids=token_type_ids, attention_mask=attention_mask
    )[0]
    embedding = layers.Dropout(0.3)(embedding)
    # todo 取消 + 1
    tag_logits = layers.Dense(num_tags, activation='softmax')(embedding)

    model = keras.Model(
        inputs=[input_ids, token_type_ids, attention_mask],
        outputs=[tag_logits],
    )
    optimizer = keras.optimizers.Adam(lr=3e-5)
    model.compile(optimizer=optimizer, loss=masked_ce_loss, metrics=['accuracy'])
    return model
Example #27
0
def create_classification_model(model_config):
    classes, max_sentence_len, dropout = model_config["classes"], model_config[
        "max_sentence_len"], model_config["dropout"]
    model_pretrained_name = model_config["pretrained_name"]
    tokens_config = model_config["tokens"]

    pad_token_id = tokens_config["pad"]["id"]

    model: tf.keras.Model
    if model_config["model_config_name"] == "bert":
        model = TFBertModel.from_pretrained(model_pretrained_name)
    elif model_config["model_config_name"] == "xlm_roberta":
        model = TFXLMRobertaModel.from_pretrained(model_pretrained_name)
    else:
        raise ValueError()

    subword_ids = tf.keras.layers.Input(shape=(max_sentence_len, ),
                                        dtype=tf.int32,
                                        name="input_ids")

    attention_masks = tf.keras.layers.Lambda(
        lambda x: tf.cast(x != pad_token_id, tf.int32))(subword_ids)

    subword_embeddings = model([subword_ids, attention_masks])[0]

    layer = subword_embeddings
    layer = tf.keras.layers.Flatten()(layer)
    dropout = tf.keras.layers.Dropout(rate=dropout)(layer)
    output = tf.keras.layers.Dense(units=classes,
                                   activation="softmax")(dropout)

    model = tf.keras.models.Model(inputs=subword_ids, outputs=output)

    if "weights_file" in model_config:
        model.load_weights(model_config["weights_file"])

    return model
Example #28
0
    def embedding(self, text):
        tokens, masks, seg = self.text_transform.text_to_int(text)
        input_ids = tf.keras.layers.Input(shape=(self.max_len, ),
                                          dtype=tf.int32,
                                          name="input_ids")
        attention_mask = tf.keras.layers.Input(shape=(self.max_len, ),
                                               dtype=tf.int32,
                                               name="attention_mask")
        token_type_ids = tf.keras.layers.Input(shape=(self.max_len, ),
                                               dtype=tf.int32,
                                               name="token_type_ids")
        bert_layer = TFBertModel.from_pretrained(self.pre_trained_model)

        outputs = bert_layer({
            'input_ids': input_ids,
            'token_type_ids': token_type_ids,
            'attention_mask': attention_mask
        })

        model = tf.keras.Model(inputs={
            'input_ids': input_ids,
            'token_type_ids': token_type_ids,
            'attention_mask': attention_mask
        },
                               outputs=outputs)

        input_token = tf.constant(tokens)[None, :]
        token_input = tf.constant(seg)[None, :]
        mask_input = tf.constant(masks)[None, :]

        input = {
            'input_ids': input_token,
            'token_type_ids': token_input,
            'attention_mask': mask_input
        }
        output = model(input)
        return (np.asarray(tf.squeeze(output[0])))
    def __post_init__(self):
        bert_model_name = [
            "hfl/chinese-bert-wwm",
            "hfl/chinese-bert-wwm-ext",
            "hfl/chinese-roberta-wwm-ext",
            "chinese-roberta-wwm-ext-large",
        ]

        self.tokenizer = BertTokenizer.from_pretrained(bert_model_name[0])
        self.bert_model = TFBertModel.from_pretrained(bert_model_name[0], from_pt=True)

        test_X_path = self.model_data_path + "test_X.pkl"
        test_mapping_path = self.model_data_path + "test_mapping.pkl"
        id2tag_path = self.model_data_path + "id2tag.pkl"

        test_X, self.test_mapping = GeneralDataPreprocessor.loadTestArrays(
            test_X_path, test_mapping_path
        )

        with open(id2tag_path, "rb") as f:
            self.id2tag = pickle.load(f)

        ckpt = tf.train.Checkpoint(optimizer=self.optimizer, model=self.model)
        ckpt.restore(tf.train.latest_checkpoint(self.checkpoint_path))
def get_model(output_model, dropout=0.2, output_layer_name="output"):
    input_word_ids = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ),
                                           dtype=tf.int32,
                                           name='input_word_ids')
    input_masks = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ),
                                        dtype=tf.int32,
                                        name='input_masks')
    input_segments = tf.keras.layers.Input((MAX_SEQUENCE_LENGTH, ),
                                           dtype=tf.int32,
                                           name='input_segments')

    config = BertConfig()
    bert_layer = TFBertModel.from_pretrained(BERT_PATH, config=config)
    hidden_layer, _ = bert_layer([input_word_ids, input_masks, input_segments])

    hidden_layer_cls = tf.reshape(hidden_layer[:, 0], (-1, 768))

    hidden_layer_dpout = tf.keras.layers.Dropout(dropout)(hidden_layer_cls)
    output_layer = output_model.get_layer(output_layer_name)(
        hidden_layer_dpout)
    model = tf.keras.models.Model(
        inputs=[input_word_ids, input_masks, input_segments],
        outputs=output_layer)
    return model