예제 #1
0
def build_model():
    model = load_trained_model_from_checkpoint(
        config_path=paths.config,
        checkpoint_path=paths.model,
        batch_size=BATCH_SIZE,
        memory_len=MEMORY_LEN,
        target_len=TEXT_LEN,
        in_train_phase=False,
        attention_type=ATTENTION_TYPE_BI)

    # 加载预训练权重
    # Build classification model
    last = model.output
    extract = Extract(index=-1, name='Extract')(last)
    dense = keras.layers.Dense(units=768, name='Dense')(extract)
    norm = keras.layers.BatchNormalization(name='Normal')(dense)
    output = keras.layers.Dense(units=2, activation='softmax',
                                name='Softmax')(norm)
    model = keras.models.Model(inputs=model.inputs, outputs=output)

    model.compile(
        optimizer=Adam(learning_rate=LEARNING_RATE),
        loss='sparse_categorical_crossentropy',
        metrics=['sparse_categorical_accuracy'],
    )

    return model
예제 #2
0
    def test_load_empty_memory(self):
        current_path = os.path.dirname(os.path.abspath(__file__))
        checkpoint_path = os.path.join(current_path, 'test_checkpoint_empty')
        model = load_trained_model_from_checkpoint(
            config_path=os.path.join(checkpoint_path, 'xlnet_config.json'),
            checkpoint_path=os.path.join(checkpoint_path, 'xlnet_model.ckpt'),
            batch_size=2,
            memory_len=5,
            target_len=5,
            in_train_phase=False,
            mask_index=0,
            attention_type=ATTENTION_TYPE_UNI,
        )
        model_path = os.path.join(tempfile.gettempdir(), 'test_xlnet_%f.h5' % np.random.random())
        model.save(model_path)
        model = keras.models.load_model(model_path, custom_objects=get_custom_objects())
        model.summary()

        def _load_numpy(name):
            return np.load(os.path.join(checkpoint_path, name + '.npy'))

        input_ids = _load_numpy('input_ids')
        seg_ids = _load_numpy('seg_ids')
        tune_output = _load_numpy('empty_output')

        inputs = [input_ids, seg_ids, np.zeros((2, 1))]
        output = model.predict_on_batch(inputs)
        self.assertTrue(np.allclose(tune_output, output, atol=1e-6))
예제 #3
0
def build_xlnet(args):
    # Load pretrained model
    model = load_trained_model_from_checkpoint(
        config_path=args.config_path,
        checkpoint_path=args.model_path,
        batch_size=args.batch_size,
        memory_len=0,
        target_len=args.maxlen,
        in_train_phase=False,
        attention_type=ATTENTION_TYPE_BI,
    )

    # Build classification model
    last = model.output
    extract = Extract(index=-1, name='Extract')(last)
    output = keras.layers.Dense(units=args.nclass,
                                activation='softmax',
                                name='Softmax')(extract)
    model = keras.models.Model(inputs=model.inputs, outputs=output)
    model.summary()

    # Fit model
    model.compile(
        optimizer=RAdam(args.lr),
        loss='categorical_crossentropy',
        metrics=['accuracy'],
    )

    print(model.summary())
    return model
예제 #4
0
 def _test_pretrained_tune(self):
     checkpoint_path = ''
     model = load_trained_model_from_checkpoint(
         config_path=os.path.join(checkpoint_path, 'xlnet_config.json'),
         checkpoint_path=os.path.join(checkpoint_path, 'xlnet_model.ckpt'),
         batch_size=8,
         memory_len=512,
         target_len=128,
         in_train_phase=False,
         attention_type=ATTENTION_TYPE_UNI,
     )
     current_path = os.path.dirname(os.path.abspath(__file__))
     visual_path = os.path.join(current_path, 'xlnet_large_tune.jpg')
     keras.utils.vis_utils.plot_model(model, visual_path, show_shapes=True)
예제 #5
0
    def test_load_training(self):
        current_path = os.path.dirname(os.path.abspath(__file__))
        checkpoint_path = os.path.join(current_path, 'test_checkpoint_pre')
        model = load_trained_model_from_checkpoint(
            config_path=os.path.join(checkpoint_path, 'xlnet_config.json'),
            checkpoint_path=os.path.join(checkpoint_path, 'xlnet_model.ckpt'),
            batch_size=2,
            memory_len=5,
            target_len=5,
            in_train_phase=True,
            mask_index=0,
            attention_type=ATTENTION_TYPE_UNI,
        )
        set_custom_objects()
        model_path = os.path.join(tempfile.gettempdir(),
                                  'test_xlnet_%f.h5' % np.random.random())
        model.save(model_path)
        model = keras.models.load_model(model_path)
        model.summary()

        def _load_numpy(name):
            return np.load(os.path.join(checkpoint_path, name + '.npy'))

        input_ids = _load_numpy('input_ids')
        seg_ids = _load_numpy('seg_ids')
        input_mask = _load_numpy('input_mask')
        mems_0 = _load_numpy('mems_0')
        mems_1 = _load_numpy('mems_1')
        pre_output = _load_numpy('pre_output')
        pre_new_mems_0 = _load_numpy('pre_new_mems_0')
        pre_new_mems_1 = _load_numpy('pre_new_mems_1')

        inputs = [input_ids, seg_ids, np.ones((2, 1)) * 5, input_mask]
        self._update_memory(model, 'Memory-0', mems_0)
        self._update_memory(model, 'Memory-1', mems_1)
        output = model.predict_on_batch(inputs)
        self.assertTrue(
            np.allclose(pre_new_mems_0,
                        self._get_memory(model, 'Memory-0', 5),
                        atol=1e-6))
        self.assertTrue(
            np.allclose(pre_new_mems_1,
                        self._get_memory(model, 'Memory-1', 5),
                        atol=1e-6))
        self.assertTrue(np.allclose(pre_output, output, atol=1e-6))
def build_xlnet_model(X):
    '''
    构建xlnet模型
    :param X: encode后的结果
    :return: np array形式做训练
    '''
    XLNetmodel = load_trained_model_from_checkpoint(
        config_path=config_path,
        checkpoint_path=checkpoint_path,
        batch_size=16,
        memory_len=0,
        target_len=100,
        in_train_phase=False,
        attention_type=ATTENTION_TYPE_UNI,
    )
    memory_length_input = np.zeros((1, 1))
    wordvec = []
    for line in X:
        token_input = np.expand_dims(np.array(line), axis=0)
        segment_input = np.zeros_like(token_input)
        XLNet_output = XLNetmodel.predict_on_batch(
            [token_input, segment_input, memory_length_input])
        wordvec.append(XLNet_output[0])
    return np.array(wordvec)
예제 #7
0
# -*- coding: utf-8 -*-

import os
from keras_xlnet import Tokenizer, load_trained_model_from_checkpoint, ATTENTION_TYPE_BI

from keras.utils import plot_model

checkpoint_path = './xlnet_cased_L-12_H-768_A-12'

tokenizer = Tokenizer(os.path.join(checkpoint_path, 'spiece.model'))
model = load_trained_model_from_checkpoint(
    config_path=os.path.join(checkpoint_path, 'xlnet_config.json'),
    checkpoint_path=os.path.join(checkpoint_path, 'xlnet_model.ckpt'),
    batch_size=16,
    memory_len=512,
    target_len=128,
    in_train_phase=False,
    attention_type=ATTENTION_TYPE_BI,
)
model.summary()

plot_model(model, to_file="xlnet.png", show_shapes=True)
예제 #8
0
    segments = np.zeros_like(tokens)
    segments[:, -1] = 1
    lengths = np.zeros_like(tokens[:, :1])
    return DataSequence([tokens, segments, lengths], classes)


current_path = os.path.dirname(os.path.abspath(__file__))
train_seq = generate_sequence(train_path)
dev_seq = generate_sequence(dev_path)

# Load pretrained model
model = load_trained_model_from_checkpoint(
    config_path=paths.config,
    checkpoint_path=paths.model,
    batch_size=BATCH_SIZE,
    memory_len=0,
    target_len=SEQ_LEN,
    in_train_phase=False,
    attention_type=ATTENTION_TYPE_BI,
)

# Build classification model
last = Extract(index=-1, name='Extract')(model.output)
dense = keras.layers.Dense(units=768, activation='tanh', name='Dense')(last)
dropout = keras.layers.Dropout(rate=0.1, name='Dropout')(dense)
output = keras.layers.Dense(units=2, activation='softmax',
                            name='Softmax')(dropout)
model = keras.models.Model(inputs=model.inputs, outputs=output)
model.summary()

# Fit model
예제 #9
0
    def build(self):
        from keras_xlnet import load_trained_model_from_checkpoint, set_custom_objects
        from keras_xlnet import Tokenizer, ATTENTION_TYPE_BI, ATTENTION_TYPE_UNI

        self.embedding_type = 'xlnet'
        self.checkpoint_path = os.path.join(self.corpus_path,
                                            'xlnet_model.ckpt')
        self.config_path = os.path.join(self.corpus_path, 'xlnet_config.json')
        self.spiece_model = os.path.join(self.corpus_path, 'spiece.model')

        self.attention_type = self.xlnet_embed.get('attention_type',
                                                   'bi')  # or 'uni'
        self.attention_type = ATTENTION_TYPE_BI if self.attention_type == 'bi' else ATTENTION_TYPE_UNI
        self.memory_len = self.xlnet_embed.get('memory_len', 0)
        self.target_len = self.xlnet_embed.get('target_len', 5)
        print('load xlnet model start!')
        # 模型加载
        model = load_trained_model_from_checkpoint(
            checkpoint_path=self.checkpoint_path,
            attention_type=self.attention_type,
            in_train_phase=self.trainable,
            config_path=self.config_path,
            memory_len=self.memory_len,
            target_len=self.target_len,
            batch_size=self.batch_size,
            mask_index=0)
        #
        set_custom_objects()
        # 字典加载
        self.tokenizer = Tokenizer(self.spiece_model)
        # debug时候查看layers
        self.model_layers = model.layers
        len_layers = self.model_layers.__len__()
        print(len_layers)

        layer_real = [i for i in range(25)] + [-i for i in range(25)]
        # 简要判别一下
        self.layer_indexes = [
            i if i in layer_real else -2 for i in self.layer_indexes
        ]

        len_couche = int((len_layers - 6) / 10)
        # 一共246个layer
        # 每层10个layer(MultiHeadAttention,Dropout,Add,LayerNormalization),第一是9个layer的输入和embedding层
        # 一共24层
        layer_dict = []
        layer_0 = 7
        for i in range(len_couche):
            layer_0 = layer_0 + 10
            layer_dict.append(layer_0)
        layer_dict.append(247)
        # 测试 get_output_at
        # def get_number(index):
        #     try:
        #        model_node = model.get_output_at(node_index=index)
        #        gg = 0
        #     except:
        #         print('node index wrong!')
        #         print(index)
        # list_index = [i for i in range(25)] + [-i for i in range(25)]
        # for li in list_index:
        #     get_number(li)

        # 输出它本身
        if len(self.layer_indexes) == 0:
            encoder_layer = model.output
        # 分类如果只有一层,取得不正确的话就取倒数第二层
        elif len(self.layer_indexes) == 1:
            if self.layer_indexes[0] in layer_real:
                encoder_layer = model.get_layer(
                    index=layer_dict[self.layer_indexes[0]]).get_output_at(
                        node_index=0)
            else:
                encoder_layer = model.get_layer(
                    index=layer_dict[-1]).get_output_at(node_index=0)
        # 否则遍历需要取的层,把所有层的weight取出来并加起来shape:768*层数
        else:
            # layer_indexes must be [0, 1, 2,3,......24]
            all_layers = [
                model.get_layer(index=layer_dict[lay]).get_output_at(
                    node_index=0) if lay in layer_real else model.get_layer(
                        index=layer_dict[-1]).get_output_at(
                            node_index=0)  # 如果给出不正确,就默认输出倒数第一层
                for lay in self.layer_indexes
            ]
            print(self.layer_indexes)
            print(all_layers)
            all_layers_select = []
            for all_layers_one in all_layers:
                all_layers_select.append(all_layers_one)
            encoder_layer = Add()(all_layers_select)
            print(encoder_layer.shape)

            # def xlnet_concat(x):
            #     x_concat = K.concatenate(x, axis=1)
            #     return x_concat
            # encoder_layer = Lambda(xlnet_concat, name='xlnet_concat')(all_layers)

        self.output = NonMaskingLayer()(encoder_layer)
        self.input = model.inputs
        self.model = Model(self.input, self.output)
        print("load KerasXlnetEmbedding end")
        model.summary(132)

        self.embedding_size = self.model.output_shape[-1]
        self.vocab_size = len(self.tokenizer.sp)
예제 #10
0
text = "这个苹果很好吃"
tokens = tokenizer.encode(text)
print(np.array(tokens).shape)

token_input = np.expand_dims(np.array(tokens), axis=0)
print(token_input.shape)
segment_input = np.zeros_like(token_input)
print(segment_input.shape)
memory_length_input = np.zeros((1, 1))

# Load pre-trained model
model = load_trained_model_from_checkpoint(
    config_path=config_path,
    checkpoint_path=model_path,
    batch_size=1,
    memory_len=0,
    target_len=14,
    in_train_phase=False,
    attention_type=ATTENTION_TYPE_UNI,
)

# Predict
results = model.predict_on_batch(
    [token_input, segment_input, memory_length_input])
print('# Uni-directional')
for i in range(len(tokens)):
    print(results[0, i, :5])
"""
Official outputs of [0, i, :5]:

  '_All': [ 1.3914602   0.47409844 -0.18970338 -1.9293687  -0.97905093]
예제 #11
0
    def build(self):
        from keras_xlnet import load_trained_model_from_checkpoint, set_custom_objects
        from keras_xlnet import Tokenizer, ATTENTION_TYPE_BI, ATTENTION_TYPE_UNI

        self.embedding_type = 'xlnet'
        self.checkpoint_path = os.path.join(self.corpus_path, 'xlnet_model.ckpt')
        self.config_path = os.path.join(self.corpus_path, 'xlnet_config.json')
        self.spiece_model = os.path.join(self.corpus_path, 'spiece.model')

        self.attention_type = self.xlnet_embed.get('attention_type', 'bi')  # or 'uni'
        self.attention_type = ATTENTION_TYPE_BI if self.attention_type == 'bi' else ATTENTION_TYPE_UNI
        self.memory_len = self.xlnet_embed.get('memory_len', 0)
        self.target_len = self.xlnet_embed.get('target_len', 5)
        print('load xlnet model start!')
        # 模型加载
        model = load_trained_model_from_checkpoint(checkpoint_path=self.checkpoint_path,
                                                   attention_type=self.attention_type,
                                                   in_train_phase=self.trainable,
                                                   config_path=self.config_path,
                                                   memory_len=self.memory_len,
                                                   target_len=self.target_len,
                                                   batch_size=self.batch_size,
                                                   mask_index=0)
        #
        set_custom_objects()
        self.build_config(self.config_path)
        # 字典加载
        self.tokenizer = Tokenizer(self.spiece_model)
        # # debug时候查看layers
        # self.model_layers = model.layers
        # len_layers = self.model_layers.__len__()
        # print(len_layers)
        num_hidden_layers = self.configs.get("n_layer", 12)

        layer_real = [i for i in range(num_hidden_layers)] + [-i for i in range(num_hidden_layers)]
        # 简要判别一下
        self.layer_indexes = [i if i in layer_real else -2 for i in self.layer_indexes]
        output_layer = "FeedForward-Normal-{0}"
        layer_dict = [model.get_layer(output_layer.format(i + 1)).get_output_at(node_index=0)
                          for i in range(num_hidden_layers)]

        # 输出它本身
        if len(self.layer_indexes) == 0:
            encoder_layer = model.output
        # 分类如果只有一层,取得不正确的话就取倒数第二层
        elif len(self.layer_indexes) == 1:
            if self.layer_indexes[0] in layer_real:
                encoder_layer = layer_dict[self.layer_indexes[0]]
            else:
                encoder_layer = layer_dict[-1]
        # 否则遍历需要取的层,把所有层的weight取出来并加起来shape:768*层数
        else:
            # layer_indexes must be [0, 1, 2,3,......24]
            all_layers = [layer_dict[lay] if lay in layer_real
                          else layer_dict[-1] # 如果给出不正确,就默认输出倒数第一层
                          for lay in self.layer_indexes]
            print(self.layer_indexes)
            print(all_layers)
            all_layers_select = []
            for all_layers_one in all_layers:
                all_layers_select.append(all_layers_one)
            encoder_layer = Add()(all_layers_select)
            print(encoder_layer.shape)

            # def xlnet_concat(x):
            #     x_concat = K.concatenate(x, axis=1)
            #     return x_concat
            # encoder_layer = Lambda(xlnet_concat, name='xlnet_concat')(all_layers)

        self.output = NonMaskingLayer()(encoder_layer)
        self.input = model.inputs
        self.model = Model(self.input, self.output)
        print("load KerasXlnetEmbedding end")
        model.summary(132)

        self.embedding_size = self.model.output_shape[-1]
        self.vocab_size = len(self.tokenizer.sp)
    def __init__(self):
        self.attention_type = ATTENTION_TYPE_BI if args.attention_type[
            0] == 'bi' else ATTENTION_TYPE_UNI
        self.memory_len, self.target_len, self.batch_size = args.memory_len, args.target_len, args.batch_size
        self.checkpoint_path, self.config_path = args.ckpt_name, args.config_name
        self.layer_indexes, self.in_train_phase = args.layer_indexes, False

        print("load KerasXlnetEmbedding start! ")
        # 全局使用,使其可以django、flask、tornado等调用
        global graph
        graph = tf.get_default_graph()
        global model
        # 模型加载
        model = load_trained_model_from_checkpoint(
            checkpoint_path=self.checkpoint_path,
            attention_type=self.attention_type,
            in_train_phase=self.in_train_phase,
            config_path=self.config_path,
            memory_len=self.memory_len,
            target_len=self.target_len,
            batch_size=self.batch_size,
            mask_index=0)
        # 字典加载
        self.tokenizer = Tokenizer(args.spiece_model)
        # debug时候查看layers
        self.model_layers = model.layers
        len_layers = self.model_layers.__len__()
        print(len_layers)
        len_couche = int((len_layers - 6) / 10)
        # 一共246个layer
        # 每层10个layer(MultiHeadAttention,Dropout,Add,LayerNormalization),第一是9个layer的输入和embedding层
        # 一共24层
        layer_dict = [5]
        layer_0 = 6
        for i in range(len_couche):
            layer_0 = layer_0 + 10
            layer_dict.append(layer_0 - 2)
        # 输出它本身
        if len(self.layer_indexes) == 0:
            encoder_layer = model.output
        # 分类如果只有一层,取得不正确的话就取倒数第二层
        elif len(self.layer_indexes) == 1:
            if self.layer_indexes[0] in [i + 1 for i in range(len_couche + 1)]:
                encoder_layer = model.get_layer(
                    index=layer_dict[self.layer_indexes[0]]).output
            else:
                encoder_layer = model.get_layer(index=layer_dict[-2]).output
        # 否则遍历需要取的层,把所有层的weight取出来并加起来shape:768*层数
        else:
            # layer_indexes must be [0, 1, 2,3,......24]
            all_layers = [
                model.get_layer(index=layer_dict[lay]).output if lay
                in [i + 1 for i in range(len_couche + 1)] else model.get_layer(
                    index=layer_dict[-2]).output  # 如果给出不正确,就默认输出倒数第二层
                for lay in self.layer_indexes
            ]
            print(self.layer_indexes)
            print(all_layers)
            all_layers_select = []
            for all_layers_one in all_layers:
                all_layers_select.append(all_layers_one)
            encoder_layer = Add()(all_layers_select)
            print(encoder_layer.shape)
        output_layer = NonMaskingLayer()(encoder_layer)
        model = Model(model.inputs, output_layer)
        print("load KerasXlnetEmbedding end")
        model.summary(132)
예제 #13
0
                    yield [
                        np.array(X1),
                        np.array(X2),
                        np.array(X3),
                        np.array(X4)
                    ], np.array(Y)
                    X1, X2, X3, X4, Y = [], [], [], [], []


if sign_num == 2:
    # 模型加载
    xlnet_model = load_trained_model_from_checkpoint(
        checkpoint_path=ckpt_name,
        attention_type=ATTENTION_TYPE_BI,
        in_train_phase=True,
        config_path=config_name,
        memory_len=0,
        target_len=seq_max_len,
        batch_size=batch_size,
        mask_index=0)
    # MODEL结构
    x1_in = Input(shape=(None, ))
    x2_in = Input(shape=(None, ))
    x3_in = Input(shape=(None, ))
    x4_in = Input(shape=(None, ))
    x = xlnet_model([x1_in, x2_in, x3_in, x4_in])
    x = Lambda(function=lambda x: x[:, 0])(x)  # !!!!!!
    p = Dense(1, activation='sigmoid')(x)

    model = Model([x1_in, x2_in, x3_in, x4_in], p)
else:
예제 #14
0
파일: xlnet.py 프로젝트: NEUljw/WN2WD
    def __init__(self, batch_size, gpu_name, gpu_num):
        set_gpu_option(gpu_name, gpu_num)
        self.attention_type = ATTENTION_TYPE_BI if args.attention_type[0] == 'bi' else ATTENTION_TYPE_UNI
        self.memory_len, self.target_len, self.batch_size = args.memory_len, args.target_len, batch_size
        self.checkpoint_path, self.config_path = args.ckpt_name, args.config_name
        self.layer_indexes, self.in_train_phase = args.layer_indexes, False

        print("##### load KerasXlnet start #####")
        self.graph = tf.get_default_graph()
        # 模型加载
        self.model = load_trained_model_from_checkpoint(checkpoint_path=self.checkpoint_path,
                                                        attention_type=self.attention_type,
                                                        in_train_phase=self.in_train_phase,
                                                        config_path=self.config_path,
                                                        memory_len=self.memory_len,
                                                        target_len=self.target_len,
                                                        batch_size=self.batch_size,
                                                        mask_index=0)
        # 字典加载
        self.tokenizer = Tokenizer(args.spiece_model)
        # debug时候查看layers
        self.model_layers = self.model.layers
        len_layers = self.model_layers.__len__()
        len_couche = int((len_layers - 6) / 10)
        # 一共126个layer
        # 每层10个layer,第一是7个layer的输入和embedding层
        # 一共12层
        layer_dict = [5]
        layer_0 = 6
        for i in range(len_couche):
            layer_0 = layer_0 + 10
            layer_dict.append(layer_0 - 2)

        # 输出它本身
        if len(self.layer_indexes) == 0:
            encoder_layer = self.model.output
        # 分类如果只有一层,取得不正确的话就取倒数第二层
        elif len(self.layer_indexes) == 1:
            if self.layer_indexes[0] in [i + 1 for i in range(len_couche + 1)]:
                encoder_layer = self.model.get_layer(index=layer_dict[self.layer_indexes[0]]).output
            else:
                encoder_layer = self.model.get_layer(index=layer_dict[-2]).output

        # 否则遍历需要取的层,把所有层的weight取出来并加起来shape:768*层数
        else:
            # layer_indexes must be [0, 1, 2,3,......12]
            all_layers = [self.model.get_layer(index=layer_dict[lay]).output
                          if lay in [i + 1 for i in range(len_couche + 1)]
                          else self.model.get_layer(index=layer_dict[-3]).output  # 如果给出不正确,就默认输出倒数第二层
                          for lay in self.layer_indexes]
            all_layers = all_layers[1:]
            all_layers_select = []
            for all_layers_one in all_layers:
                all_layers_select.append(all_layers_one)
            encoder_layer = Add()(all_layers_select)

        output_layer = NonMaskingLayer()(encoder_layer)
        model = Model(self.model.inputs, output_layer)
        if gpu_num >= 2:
            self.par_model = multi_gpu_model(model, gpus=gpu_num)
        else:
            self.par_model = model
        print("##### load KerasXlnet end #####")
예제 #15
0
    def build(self):
        from keras_xlnet import Tokenizer, ATTENTION_TYPE_BI, ATTENTION_TYPE_UNI
        from keras_xlnet import load_trained_model_from_checkpoint

        self.embedding_type = 'xlnet'
        self.checkpoint_path = os.path.join(self.corpus_path, 'xlnet_model.ckpt')
        self.config_path = os.path.join(self.corpus_path, 'xlnet_config.json')
        self.spiece_model = os.path.join(self.corpus_path, 'spiece.model')

        self.attention_type = self.xlnet_embed.get('attention_type', 'bi')  # or 'uni'
        self.attention_type = ATTENTION_TYPE_BI if self.attention_type == 'bi' else ATTENTION_TYPE_UNI
        self.memory_len =  self.xlnet_embed.get('memory_len', 0)
        self.target_len = self.xlnet_embed.get('target_len', 5)
        print('load xlnet model start!')
        # 模型加载
        model = load_trained_model_from_checkpoint(checkpoint_path=self.checkpoint_path,
                                                   attention_type=self.attention_type,
                                                   in_train_phase=self.trainable,
                                                   config_path=self.config_path,
                                                   memory_len=self.memory_len,
                                                   target_len=self.target_len,
                                                   batch_size=self.batch_size,
                                                   mask_index=0)
        # 字典加载
        self.tokenizer = Tokenizer(self.spiece_model)
        # debug时候查看layers
        self.model_layers = model.layers
        len_layers = self.model_layers.__len__()
        print(len_layers)
        len_couche = int((len_layers - 6) / 10)
        # 一共246个layer
        # 每层10个layer(MultiHeadAttention,Dropout,Add,LayerNormalization),第一是9个layer的输入和embedding层
        # 一共24层
        layer_dict = [5]
        layer_0 = 6
        for i in range(len_couche):
            layer_0 = layer_0 + 10
            layer_dict.append(layer_0 - 2)
        # 输出它本身
        if len(self.layer_indexes) == 0:
            encoder_layer = model.output
        # 分类如果只有一层,取得不正确的话就取倒数第二层
        elif len(self.layer_indexes) == 1:
            if self.layer_indexes[0] in [i + 1 for i in range(len_couche + 1)]:
                encoder_layer = model.get_layer(index=layer_dict[self.layer_indexes[0]]).output
            else:
                encoder_layer = model.get_layer(index=layer_dict[-1]).output
        # 否则遍历需要取的层,把所有层的weight取出来并加起来shape:768*层数
        else:
            # layer_indexes must be [0, 1, 2,3,......24]
            all_layers = [model.get_layer(index=layer_dict[lay]).output
                          if lay in [i + 1 for i in range(len_couche + 1)]
                          else model.get_layer(index=layer_dict[-1]).output  # 如果给出不正确,就默认输出倒数第一层
                          for lay in self.layer_indexes]
            print(self.layer_indexes)
            print(all_layers)
            all_layers_select = []
            for all_layers_one in all_layers:
                all_layers_select.append(all_layers_one)
            encoder_layer = Add()(all_layers_select)
            print(encoder_layer.shape)
        self.output = NonMaskingLayer()(encoder_layer)
        self.input = model.inputs
        self.model = Model(model.inputs, self.output)
        print("load KerasXlnetEmbedding end")
        model.summary(132)

        self.embedding_size = self.model.output_shape[-1]
        self.vocab_size = len(self.tokenizer.sp)
    def build(self):
        print('load XLNet model start!')
        print([
            self.target_len, self.memory_len, self.attention_type,
            self.batch_size
        ])
        # 模型加载
        model = load_trained_model_from_checkpoint(
            checkpoint_path=self.checkpoint_path,
            attention_type=self.attention_type,
            in_train_phase=self.trainable,
            config_path=self.config_path,
            memory_len=self.memory_len,
            target_len=self.target_len,
            batch_size=self.batch_size,
            mask_index=0)

        self.model_layers = model.layers
        """
        # debug时候查看layers
        for i in range(len(model.layers)):
            print([i, model.layers[i]])
        base版trainable: 129层  9 + 120     trainable=False:  126  6+120
        0-8:输入 + embedding
        9-128: 每10个layer一层

        mid版trainable=True: 249层   9 + 240     trainable=False:  246  6+240
        0-8:输入 + embedding
        9-248: 每10个layer一层
        """
        len_layers = self.model_layers.__len__()
        len_couche = len_layers // 10

        layer_0 = len_layers - len_couche * 10
        layer_dict = [layer_0 - 1]
        if self.trainable == False:
            layer_dict[0] += 1
            sub_diff = 1
        else:
            sub_diff = 2
        for i in range(len_couche):
            layer_0 += 10
            layer_dict.append(layer_0 - sub_diff)

        if len(self.layer_indexes) == 0:
            encoder_layer = model.output
        elif len(self.layer_indexes) == 1:
            if abs(self.layer_indexes[0]) in [
                    i for i in range(len_couche + 1)
            ]:
                encoder_layer = model.get_layer(
                    index=layer_dict[self.layer_indexes[0]]).get_output_at(-1)
            else:
                encoder_layer = model.get_layer(
                    index=layer_dict[-1]).get_output_at(-1)
        else:
            all_layers = [
                model.get_layer(index=layer_dict[lay]).get_output_at(-1)
                if abs(lay) in [i for i in range(len_couche + 1)] else
                model.get_layer(index=layer_dict[-1]).get_output_at(-1)
                for lay in self.layer_indexes
            ]
            all_layers_select = []
            for all_layers_one in all_layers:
                all_layers_select.append(all_layers_one)

            # custom
            if self.merge_type == "add":
                encoder_layer = Add()(all_layers_select)
            elif self.merge_type == "avg":
                encoder_layer = Average()(all_layers_select)
            elif self.merge_type == "max":
                encoder_layer = Maximum()(all_layers_select)
            elif self.merge_type == "concat":
                encoder_layer = Concatenate()(all_layers_select)
            else:
                raise RuntimeError("invalid merge type")
            print(encoder_layer)

        self.output = Lambda(lambda x: x,
                             output_shape=lambda s: s)(encoder_layer)
        self.input = model.inputs
        self.model = Model(model.inputs, self.output)

        self.embedding_size = self.model.output_shape[-1]
        self.vocab_size = len(tokenizer.sp)

        self.built = True
        if os.path.exists(self.path_fineture) and self.trainable:
            self.model.load_weights(self.path_fineture)
        print("load Keras XLNet Embedding finish")