Beispiel #1
0
    def __init__(
            self,
            pretrained_model_name_or_path='xlnet-base-cased',
            reduce_output='sum',
            trainable=True,
            num_tokens=None,
            **kwargs
    ):
        super(XLNetEncoder, self).__init__()
        try:
            from transformers import TFXLNetModel
        except ModuleNotFoundError:
            logger.error(
                ' transformers is not installed. '
                'In order to install all text feature dependencies run '
                'pip install ludwig[text]'
            )
            sys.exit(-1)

        self.transformer = TFXLNetModel.from_pretrained(
            pretrained_model_name_or_path
        )
        self.reduce_output = reduce_output
        self.reduce_sequence = SequenceReducer(reduce_mode=reduce_output)
        self.transformer.trainable = trainable
        self.transformer.resize_token_embeddings(num_tokens)
Beispiel #2
0
 def __init__(self, dropout=0.1):
     super().__init__()
     self.xlnet = TFXLNetModel.from_pretrained('xlnet-base-cased',
                                               trainable=True)
     self.drop = tf.keras.layers.Dropout(dropout)
     self.fc = tf.keras.layers.Dense(300, tf.nn.swish)
     self.out = tf.keras.layers.Dense(2)
Beispiel #3
0
 def __init__(self, dropout_rate=0.2, units=300):
     super().__init__()
     self.xlnet = TFXLNetModel.from_pretrained('xlnet-base-uncased',
                                               trainable=True)
     self.drop1 = tf.keras.layers.Dropout(dropout_rate)
     self.drop2 = tf.keras.layers.Dropout(dropout_rate)
     self.fc = tf.keras.layers.Dense(units, tf.nn.swish)
     self.out = tf.keras.layers.Dense(3)
Beispiel #4
0
 def _test_TFXLNET(self, size, large=False):
     from transformers import XLNetTokenizer, TFXLNetModel
     tokenizer = XLNetTokenizer.from_pretrained(size)
     model = TFXLNetModel.from_pretrained(size)
     input_dict = tokenizer("Hello, my dog is cute", return_tensors="tf")
     spec, input_dict = self.spec_and_pad(input_dict)
     outputs = ["last_hidden_state"]
     self.run_test(model, input_dict, input_signature=spec, outputs=outputs, large=large)
Beispiel #5
0
    def __init__(self, input_shape=600, num_classes=3, linear_layers=1):
        self.lin_layers = []
        self.tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
        tokenized_sentence = keras.Input(shape=(128,), name='word_inputs', dtype='int32')
        xlnet=TFXLNetModel.from_pretrained('xlnet-large-cased')
        xlnet_encodings=xlnet(tokenized_sentence)[0]
        last_hidden = tf.squeeze(xlnet_encodings[:, -1:, :], axis=1)
        last_hidden = keras.layers.Dropout(0.1)(last_hidden)
        self.classifier = keras.layers.Dense(units=3, activation='softmax', kernel_initializer='random_normal', bias_initializer='zeros')(last_hidden)

        self.model = keras.Model(inputs=[tokenized_sentence], outputs=[self.classifier])
Beispiel #6
0
def get_transformer(bert_model_type, output_hidden_states=False):
    config = get_bert_config(bert_model_type, output_hidden_states)
    if bert_model_type in [
            'bert-base-uncased', 'bert-base-cased', 'bert-large-uncased',
            'bert-large-uncased-whole-word-masking',
            'bert-large-uncased-whole-word-masking-finetuned-squad'
    ]:
        return TFBertModel.from_pretrained(BERT_MODEL_FILE[bert_model_type],
                                           config=config)
    elif bert_model_type in [
            'prod-bert-base-uncased', 'tune_bert-base-uncased_nsp'
    ]:
        return TFBertModel.from_pretrained(BERT_MODEL_FILE[bert_model_type],
                                           config=config,
                                           from_pt=True)
    elif bert_model_type in [
            'roberta-base', 'roberta-large', 'roberta-large-mnli',
            'distilroberta-base'
    ]:
        return TFRobertaModel.from_pretrained(BERT_MODEL_FILE[bert_model_type],
                                              config=config)
    elif bert_model_type in ['prod-roberta-base-cased']:
        return TFRobertaModel.from_pretrained(BERT_MODEL_FILE[bert_model_type],
                                              config=config,
                                              from_pt=True)
    elif bert_model_type in ['xlnet-base-cased']:
        return TFXLNetModel.from_pretrained(BERT_MODEL_FILE[bert_model_type],
                                            config=config)
    elif bert_model_type in [
            'albert-base-v1', 'albert-large-v1', 'albert-xlarge-v1',
            'albert-xxlarge-v1'
    ]:
        return TFAlbertModel.from_pretrained(BERT_MODEL_FILE[bert_model_type],
                                             config=config)
    elif bert_model_type in ['gpt2', 'gpt2-medium']:
        return TFGPT2Model.from_pretrained(BERT_MODEL_FILE[bert_model_type],
                                           config=config)
    elif bert_model_type in ['transfo-xl']:
        return TFTransfoXLModel.from_pretrained(
            BERT_MODEL_FILE[bert_model_type], config=config)
    elif bert_model_type in [
            'distilbert-base-uncased',
            'distilbert-base-uncased-distilled-squad'
    ]:
        return TFDistilBertModel.from_pretrained(
            BERT_MODEL_FILE[bert_model_type], config=config)
    else:
        raise ValueError(
            f'`bert_model_type` not understood: {bert_model_type}')
Beispiel #7
0
def get_xlnet():
    ids = keras.layers.Input(shape=(None, ), dtype=tf.int32, name='ids')
    att = keras.layers.Input(shape=(None, ), dtype=tf.int32, name='att')
    tok_type_ids = keras.layers.Input(shape=(None, ),
                                      dtype=tf.int32,
                                      name='tti')

    config = XLNetConfig.from_pretrained(Config.XLNet.config)
    xlnet_model = TFXLNetModel.from_pretrained(Config.XLNet.model,
                                               config=config)

    x = xlnet_model(ids, attention_mask=att, token_type_ids=tok_type_ids)

    x1 = keras.layers.Dropout(0.15)(x[0])
    x1 = keras.layers.Conv1D(768, 2, padding='same')(x1)
    x1 = keras.layers.LeakyReLU()(x1)
    x1 = keras.layers.LayerNormalization()(x1)
    x1 = keras.layers.Conv1D(64, 2, padding='same')(x1)
    x1 = keras.layers.LeakyReLU()(x1)
    x1 = keras.layers.LayerNormalization()(x1)
    x1 = keras.layers.Conv1D(32, 2, padding='same')(x1)
    x1 = keras.layers.Conv1D(1, 1)(x1)
    x1 = keras.layers.Flatten()(x1)
    x1 = keras.layers.Activation('softmax', dtype='float32', name='sts')(x1)

    x2 = keras.layers.Dropout(0.15)(x[0])
    x2 = keras.layers.Conv1D(768, 2, padding='same')(x2)
    x2 = keras.layers.LeakyReLU()(x2)
    x2 = keras.layers.LayerNormalization()(x2)
    x2 = keras.layers.Conv1D(64, 2, padding='same')(x2)
    x2 = keras.layers.LeakyReLU()(x2)
    x2 = keras.layers.LayerNormalization()(x2)
    x2 = keras.layers.Conv1D(32, 2, padding='same')(x2)
    x2 = keras.layers.Conv1D(1, 1)(x2)
    x2 = keras.layers.Flatten()(x2)
    x2 = keras.layers.Activation('softmax', dtype='float32', name='ets')(x2)

    model = keras.models.Model(inputs=[ids, att, tok_type_ids],
                               outputs=[x1, x2])

    optimizer = keras.optimizers.Adam(learning_rate=6e-5)
    if Config.Train.use_amp:
        optimizer = keras.mixed_precision.experimental.LossScaleOptimizer(
            optimizer, 'dynamic')
    loss = keras.losses.CategoricalCrossentropy(
        label_smoothing=Config.Train.label_smoothing)
    model.compile(loss=loss, optimizer=optimizer)

    return model
Beispiel #8
0
 def __init__(self, intent_size, slot_size, lr=1e-4, dropout_rate=0.2, units=300):
     super().__init__()
     self.xlnet = TFXLNetModel.from_pretrained('xlnet-base-uncased',
                                               trainable=True)
     self.inp_dropout = Dropout(dropout_rate)
     self.intent_dropout = Dropout(dropout_rate)
     self.fc_intent = Dense(units, activation='relu')
     self.trans_params = self.add_weight(shape=(slot_size, slot_size))
     self.out_linear_intent = Dense(intent_size)
     self.out_linear_slot = Dense(slot_size)
     self.optimizer = Adam(lr)
     self.slots_accuracy = tf.keras.metrics.Accuracy()
     self.intent_accuracy = tf.keras.metrics.Accuracy()
     self.decay_lr = tf.optimizers.schedules.ExponentialDecay(lr, 1000, 0.95)
     self.logger = logging.getLogger('tensorflow')
     self.logger.setLevel(logging.INFO)
Beispiel #9
0
 def create_xlnet(self, mname):
     word_inputs = tf.keras.Input(shape=(120, ),
                                  name='word_inputs',
                                  dtype='int32')
     xlnet = TFXLNetModel.from_pretrained(mname)
     xlnet_encodings = xlnet(word_inputs)[0]
     doc_encoding = tf.squeeze(xlnet_encodings[:, -1:, :], axis=1)
     doc_encoding = tf.keras.layers.Dropout(.1)(doc_encoding)
     outputs = tf.keras.layers.Dense(1,
                                     activation='sigmoid',
                                     name='outputs')(doc_encoding)
     model = tf.keras.Model(inputs=[word_inputs], outputs=[outputs])
     model.compile(optimizer=tf.keras.optimizers.Adam(lr=2e-5),
                   loss='binary_crossentropy',
                   metrics=[
                       'accuracy',
                       tf.keras.metrics.Precision(),
                       tf.keras.metrics.Recall()
                   ])
     return model
Beispiel #10
0
 def __init__(self, MODELPATH, MODEL=None):
     self.special_token_set = {
         'roberta': (['<s>', '</s>'], 'be'),
         'bert': (['[CLS]', '[SEP]'], 'be'),
         'xlnet': (['<sep>', '<cls>'], 'e')
     }
     self.tokenizer = None
     self.model = None
     self.modeltype = None
     self.add_prefix_space = None
     if MODEL:
         MODEL = MODEL
     else:
         MODEL = MODELPATH.split('/')[-1]
     print(MODEL, MODELPATH)
     if MODEL.startswith('roberta'):
         self.modeltype = 'roberta'
         self.tokenizer = RobertaTokenizer.from_pretrained(
             MODELPATH, add_special_tokens=False)
         self.model = TFRobertaModel.from_pretrained(MODELPATH,
                                                     output_attentions=True)
         self.add_prefix_space = True
     if MODEL.startswith('bert'):
         self.modeltype = 'bert'
         self.tokenizer = BertTokenizer.from_pretrained(
             MODELPATH, add_special_tokens=False)
         self.model = TFBertModel.from_pretrained(MODELPATH,
                                                  output_attentions=True)
         self.add_prefix_space = False
     if MODEL.startswith('xlnet'):
         self.modeltype = 'xlnet'
         self.tokenizer = XLNetTokenizer.from_pretrained(
             MODELPATH, add_special_tokens=False)
         self.model = TFXLNetModel.from_pretrained(MODELPATH,
                                                   output_attentions=True)
         self.add_prefix_space = False
Beispiel #11
0
def classifier(model, emb_mean, emb_std, embeddings_index):
    train = pd.read_csv('./input/TIL_NLP_train_dataset.csv')
    test = pd.read_csv('./input/TIL_NLP_test_dataset.csv')
    global EMBEDDING_FILE
    print('running classifier')
    print(train.head(6))
    train = shuffle(train)
    print(train.head(6))

    max_features = 4620
    maxlen = 200
    embed_size = 100

    X_train = train["word_representation"].fillna("fillna").values
    y_train = train[[
        "outwear", "top", "trousers", "women dresses", "women skirts"
    ]].values
    X_test = test["word_representation"].fillna("fillna").values
    print('preprocessing start')

    tokenizer = text.Tokenizer(num_words=max_features)
    tokenizer.fit_on_texts(list(X_train) + list(X_test))
    X_train = tokenizer.texts_to_sequences(X_train)

    X_test = tokenizer.texts_to_sequences(X_test)
    x_train = sequence.pad_sequences(X_train, maxlen=maxlen)
    x_test = sequence.pad_sequences(X_test, maxlen=maxlen)

    del X_train, X_test, train, test
    gc.collect()
    # %% [code] {"scrolled:true"}
    word_index = tokenizer.word_index
    nb_words = min(max_features, len(word_index))
    embedding_matrix = np.random.normal(emb_mean, emb_std,
                                        (nb_words, embed_size))
    # %% [code] {"scrolled:true"}
    for word, i in word_index.items():
        if i >= max_features: continue
        embedding_vector = embeddings_index.get(word)
        if embedding_vector is not None:
            embedding_matrix[i - 1] = embedding_vector

    print('preprocessing done')

    # session_conf = tf.ConfigProto(intra_op_parallelism_threads=4, inter_op_parallelism_threads=4)
    # K.set_session(tf.Session(graph=tf.get_default_graph(), config=session_conf))

    #model
    #wrote out all the blocks instead of looping for simplicity

    filter_nr = 64
    filter_size = 3
    max_pool_size = 3
    max_pool_strides = 2
    dense_nr = 256
    spatial_dropout = 0.2
    dense_dropout = 0.5
    train_embed = False
    conv_kern_reg = regularizers.l2(0.00001)
    conv_bias_reg = regularizers.l2(0.00001)

    comment = Input(shape=(maxlen, ))
    emb_comment = Embedding(max_features,
                            embed_size,
                            weights=[embedding_matrix],
                            trainable=train_embed)(comment)
    emb_comment = SpatialDropout1D(spatial_dropout)(emb_comment)

    block1 = Conv1D(filter_nr,
                    kernel_size=filter_size,
                    padding='same',
                    activation='linear',
                    kernel_regularizer=conv_kern_reg,
                    bias_regularizer=conv_bias_reg)(emb_comment)
    block1 = BatchNormalization()(block1)
    block1 = PReLU()(block1)
    block1 = Conv1D(filter_nr,
                    kernel_size=filter_size,
                    padding='same',
                    activation='linear',
                    kernel_regularizer=conv_kern_reg,
                    bias_regularizer=conv_bias_reg)(block1)
    block1 = BatchNormalization()(block1)
    block1 = PReLU()(block1)

    #we pass embedded comment through conv1d with filter size 1 because it needs to have the same shape as block output
    #if you choose filter_nr = embed_size (300 in this case) you don't have to do this part and can add emb_comment directly to block1_output
    resize_emb = Conv1D(filter_nr,
                        kernel_size=1,
                        padding='same',
                        activation='linear',
                        kernel_regularizer=conv_kern_reg,
                        bias_regularizer=conv_bias_reg)(emb_comment)
    resize_emb = PReLU()(resize_emb)

    block1_output = add([block1, resize_emb])
    block1_output = MaxPooling1D(pool_size=max_pool_size,
                                 strides=max_pool_strides)(block1_output)

    block2 = Conv1D(filter_nr,
                    kernel_size=filter_size,
                    padding='same',
                    activation='linear',
                    kernel_regularizer=conv_kern_reg,
                    bias_regularizer=conv_bias_reg)(block1_output)
    block2 = BatchNormalization()(block2)
    block2 = PReLU()(block2)
    block2 = Conv1D(filter_nr,
                    kernel_size=filter_size,
                    padding='same',
                    activation='linear',
                    kernel_regularizer=conv_kern_reg,
                    bias_regularizer=conv_bias_reg)(block2)
    block2 = BatchNormalization()(block2)
    block2 = PReLU()(block2)

    block2_output = add([block2, block1_output])
    block2_output = MaxPooling1D(pool_size=max_pool_size,
                                 strides=max_pool_strides)(block2_output)

    block3 = Conv1D(filter_nr,
                    kernel_size=filter_size,
                    padding='same',
                    activation='linear',
                    kernel_regularizer=conv_kern_reg,
                    bias_regularizer=conv_bias_reg)(block2_output)
    block3 = BatchNormalization()(block3)
    block3 = PReLU()(block3)
    block3 = Conv1D(filter_nr,
                    kernel_size=filter_size,
                    padding='same',
                    activation='linear',
                    kernel_regularizer=conv_kern_reg,
                    bias_regularizer=conv_bias_reg)(block3)
    block3 = BatchNormalization()(block3)
    block3 = PReLU()(block3)

    block3_output = add([block3, block2_output])
    block3_output = MaxPooling1D(pool_size=max_pool_size,
                                 strides=max_pool_strides)(block3_output)

    block4 = Conv1D(filter_nr,
                    kernel_size=filter_size,
                    padding='same',
                    activation='linear',
                    kernel_regularizer=conv_kern_reg,
                    bias_regularizer=conv_bias_reg)(block3_output)
    block4 = BatchNormalization()(block4)
    block4 = PReLU()(block4)
    block4 = Conv1D(filter_nr,
                    kernel_size=filter_size,
                    padding='same',
                    activation='linear',
                    kernel_regularizer=conv_kern_reg,
                    bias_regularizer=conv_bias_reg)(block4)
    block4 = BatchNormalization()(block4)
    block4 = PReLU()(block4)

    block4_output = add([block4, block3_output])
    block4_output = MaxPooling1D(pool_size=max_pool_size,
                                 strides=max_pool_strides)(block4_output)

    block5 = Conv1D(filter_nr,
                    kernel_size=filter_size,
                    padding='same',
                    activation='linear',
                    kernel_regularizer=conv_kern_reg,
                    bias_regularizer=conv_bias_reg)(block4_output)
    block5 = BatchNormalization()(block5)
    block5 = PReLU()(block5)
    block5 = Conv1D(filter_nr,
                    kernel_size=filter_size,
                    padding='same',
                    activation='linear',
                    kernel_regularizer=conv_kern_reg,
                    bias_regularizer=conv_bias_reg)(block5)
    block5 = BatchNormalization()(block5)
    block5 = PReLU()(block5)

    block5_output = add([block5, block4_output])
    block5_output = MaxPooling1D(pool_size=max_pool_size,
                                 strides=max_pool_strides)(block5_output)

    block6 = Conv1D(filter_nr,
                    kernel_size=filter_size,
                    padding='same',
                    activation='linear',
                    kernel_regularizer=conv_kern_reg,
                    bias_regularizer=conv_bias_reg)(block5_output)
    block6 = BatchNormalization()(block6)
    block6 = PReLU()(block6)
    block6 = Conv1D(filter_nr,
                    kernel_size=filter_size,
                    padding='same',
                    activation='linear',
                    kernel_regularizer=conv_kern_reg,
                    bias_regularizer=conv_bias_reg)(block6)
    block6 = BatchNormalization()(block6)
    block6 = PReLU()(block6)

    block6_output = add([block6, block5_output])
    block6_output = MaxPooling1D(pool_size=max_pool_size,
                                 strides=max_pool_strides)(block6_output)

    block7 = Conv1D(filter_nr,
                    kernel_size=filter_size,
                    padding='same',
                    activation='linear',
                    kernel_regularizer=conv_kern_reg,
                    bias_regularizer=conv_bias_reg)(block6_output)
    block7 = BatchNormalization()(block7)
    block7 = PReLU()(block7)
    block7 = Conv1D(filter_nr,
                    kernel_size=filter_size,
                    padding='same',
                    activation='linear',
                    kernel_regularizer=conv_kern_reg,
                    bias_regularizer=conv_bias_reg)(block7)
    block7 = BatchNormalization()(block7)
    block7 = PReLU()(block7)

    block7_output = add([block7, block6_output])
    output = GlobalMaxPooling1D()(block7_output)

    output = Dense(dense_nr, activation='linear')(output)
    output = BatchNormalization()(output)
    output = PReLU()(output)
    output = Dropout(dense_dropout)(output)
    output = Dense(5, activation='sigmoid')(output)

    from transformers import XLNetTokenizer, TFXLNetModel

    tokenizer = XLNetTokenizer.from_pretrained('xlnet-large-cased')
    model = TFXLNetModel.from_pretrained('xlnet-large-cased')
    input_ids = tf.constant(
        tokenizer.encode("Hello, my dog is cute",
                         add_special_tokens=True))[None, :]  # Batch size 1
    print(input_ids, type(input_ids))
    outputs = model(input_ids)
    last_hidden_states = outputs[
        0]  # The last hidden-state is the first element of the output tuple

    #model = Model(comment, output)
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizers.Adam(),
                  metrics=['accuracy'])

    batch_size = 128
    epochs = 4

    Xtrain, Xval, ytrain, yval = train_test_split(x_train,
                                                  y_train,
                                                  train_size=0.95,
                                                  random_state=233)

    lr = callbacks.LearningRateScheduler(schedule)
    ra_val = RocAucEvaluation(validation_data=(Xval, yval), interval=1)
    #model.fit(Xtrain, ytrain, batch_size=batch_size, epochs=epochs, validation_data=(Xval, yval), callbacks = [lr, ra_val] ,verbose=1)

    y_pred = model.predict(Xval)
    y_pred = [[1 if i > 0.5 else 0 for i in r] for r in yval]
    y_val = yval.tolist()
    accuracy = sum([y_pred[i] == y_val[i]
                    for i in range(len(y_pred))]) / len(y_pred) * 100

    print(Xval)
    print(y_pred)
    print(yval.tolist())
    print(accuracy)
    """
    submission = pd.read_csv('../input/jigsaw-toxic-comment-classification-challenge/sample_submission.csv')
    submission[["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]] = y_pred
    submission.to_csv('dpcnn_test_preds.csv', index=False)
    """
    return model
Beispiel #12
0
def xlnet_process(DATA_PATH_TRAIN=None,
                  DATA_PATH_TEST=None,
                  phase='train',
                  splits=5,
                  maxlen=70):

    tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased")

    if phase in ['train', 'both']:
        text, is_humor, _, _, offensiveness = load_data(DATA_PATH_TRAIN)
        text_sarc, irony = load_data_sarcasm('../data/sarcasm2018_data.csv')

        textt = convert_lines(text, maxlen, tokenizer)
        text = convert_lines(text, maxlen, tokenizer)

        text = np.concatenate([textt, text_sarc])
        offensiveness = np.concatenate(
            [offensiveness, np.zeros_like(irony) - 1])
        irony = np.concatenate(
            [np.zeros_like(is_humor) - 1,
             np.zeros_like(irony) - 1])
        perm = np.random.permutation(len(text))
        text = text[perm], offensiveness = offensiveness[perm], irony = irony[
            perm]
        print(irony.shape, offensiveness.shape, text.shape)

        data_val = get_splits_for_val(is_humor, splits)
        allindex = [i for i in range(len(offensiveness))]

    if phase in ['encode', 'both']:
        text_dev = pd.read_csv(DATA_PATH_TEST).to_numpy()[:, 1]
        text_dev = convert_lines(text_dev, maxlen, tokenizer)

    encode_train = None
    encode_dev = None

    if phase in ['train', 'both']:
        for i in range(splits):

            train_index = np.array(list(set(allindex) - set(data_val[i])))
            test_index = np.array(list(data_val[i]))
            train_index = list(train_index[np.random.permutation(
                len(train_index))])
            test_index = list(test_index[np.random.permutation(
                len(test_index))])

            model = TFXLNetModel.from_pretrained("xlnet-base-cased",
                                                 output_hidden_states=True,
                                                 return_dict=True)
            XLNOffensive = TransOffensive_build(text[0].shape, featt[0].shape,
                                                model, maxlen)
            coef_learning = set_lt_multipliers_xlnet(0.9, XLNOffensive)

            opt = NadamW(learning_rate=1e-5,
                         decay=2e-6,
                         lr_multipliers=coef_learning,
                         init_verbose=0)

            XLNOffensive.compile(optimizer=opt,
                                 loss=Minkowski_masked_loss,
                                 metrics=masked_root_mean_squared_error)
            filepath = f'../data/xlnet_weights{i+1}.h5'

            checkpointer = K.callbacks.ModelCheckpoint(filepath,
                                                       verbose=1,
                                                       monitor='val_f1',
                                                       mode='max',
                                                       save_best_only=True,
                                                       save_weights_only=True)

            XLNOffensive.fit(text[train_index],
                             [irony[train_index], offensiveness[train_index]],
                             validation_data=(text[test_index], [
                                 irony[test_index], offensiveness[test_index]
                             ]),
                             batch_size=32,
                             epochs=12,
                             callbacks=[checkpointer],
                             verbose=1)

    if phase in ['encode', 'both']:

        model = TFXLNetModel.from_pretrained("xlnet-base-cased",
                                             output_hidden_states=True,
                                             return_dict=True)
        XLNOffensive = TransOffensive_build(text[0].shape, featt[0].shape,
                                            model, maxlen)

        for i in range(splits):
            XLNOffensive.load_weights(f'../data/xlnet_weights{i+1}.h5',
                                      by_name=True)

            if i == 0:
                Embedder = K.models.Model(
                    inputs=XLNOffensive.input,
                    outputs=XLNOffensive.get_layer('encoder_layer').output)

                encode_dev = Embedder.predict(text_dev)
                if phase == 'both':
                    encode_train = Embedder.predict(textt)
            else:
                Embedder = K.models.Model(
                    inputs=XLNOffensive.input,
                    outputs=XLNOffensive.get_layer('encoder_layer').output)

                encode_dev = np.concatenate(
                    [encode_dev, Embedder.predict([text_dev])], axis=1)
                if phase == 'both':
                    encode_train = np.concatenate(
                        [encode_train, Embedder.predict([textt])], axis=1)

    if encode_train is not None:
        np.save(f'../data/xlnet_train_encode', encode_dev)
    if encode_dev is not None:
        np.save(f'../data/xlnet_dev_encode', encode_train)
def convert_sentences(sents, max_seq_len=256):
    shape = (len(sents), max_seq_len)
    input_ids = np.zeros(shape, dtype='int32')
    for ii, sent in tqdm(enumerate(sents), desc="Converting sentences"):
        idlist = tokenizer.encode(sent)[:max_seq_len]
        input_ids[ii, :len(idlist)] = idlist
    return input_ids


(train_text, train_label), (test_text,
                            test_label) = GetImdbData(max_seq_len, 3000)
train_inputs, test_inputs = map(lambda x: convert_sentences(x, max_seq_len),
                                [train_text, test_text])

xlnet = TFXLNetModel.from_pretrained(model_name)

#xlnet.trainable = False
xlnet.transformer._layers[0].trainable = False
for x in xlnet.transformer._layers[1][:10]:
    x.trainable = False

input_word_ids = tf.keras.layers.Input(shape=(max_seq_len, ), dtype=tf.int32)
seq_output = xlnet(input_word_ids)[0]


class MyMasking(tf.keras.layers.Layer):
    def call(self, x):
        return x[0]

    def compute_mask(self, input, input_mask=None):