Ejemplo n.º 1
0
 def load_bert_mc(self):
     b_model = get_model(token_num=21128, )  ##21128是词典大小
     compile_model(b_model)
     bert_model = Model(
         inputs=b_model.input[:2],
         outputs=b_model.get_layer('Encoder-12-FeedForward-Norm').output)
     x1_in = Input(shape=(None, ))  # 问题和资料的拼接句子输入
     x2_in = Input(shape=(None, ))  # 问题和资料的拼接句子输入
     s1_in = Input(shape=(None, ))  #答案的左边界(标签)
     s2_in = Input(shape=(None, ))  #答案的右边界(标签)
     x1, x2, s1, s2 = x1_in, x2_in, s1_in, s2_in
     x_mask = Lambda(
         lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(x1)
     x = bert_model([x1, x2])
     ps1 = Dense(1, use_bias=False)(x)
     ###[[0.1],[0.2],[0.3]..] -> [0.1,0.2,0.3,...]
     ###[0.1,0.2,0.3,...] - [0,0,0,0,1,1,1,1]*1e10
     ps1 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)(
         [ps1, x_mask])
     # ps1 = Lambda(lambda x: x[0]*x[1])([ps1, x_mask])
     ps2 = Dense(1, use_bias=False)(x)
     ps2 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)(
         [ps2, x_mask])
     # ps2 = Lambda(lambda x:x[0]*x[1])([ps2, x_mask])
     model = Model([x1_in, x2_in], [ps1, ps2])
     model.load_weights(self.model_path)
     return model
Ejemplo n.º 2
0
 def test_save_load_json(self):
     model = get_model(
         token_num=200,
         head_num=3,
         transformer_num=2,
         attention_activation='gelu',
     )
     compile_model(model)
     data = model.to_json()
     set_custom_objects()
     model = keras.models.model_from_json(data)
     model.summary()
def pretrain_model():

    df = pd.read_csv('../data/task2_trainset.csv', dtype=str)
    df_2 = pd.read_csv('../data/task2_public_testset.csv', dtype=str)
    abstract_1 = df.values[:, 2]
    abstract_2 = df_2.values[:, 2]
    token_dict = load_vocabulary(dict_path)
    token_list = list(token_dict.keys())
    tokenizer = Tokenizer(token_dict)
    X_1 = collect_inputs(abstract_1, tokenizer)
    X_2 = collect_inputs(abstract_2, tokenizer)
    X = X_1 + X_2
    print(len(X))

    model = load_trained_model_from_checkpoint(config_path,
                                               checkpoint_path,
                                               training=True,
                                               trainable=True,
                                               seq_len=512)
    compile_model(model)

    def _generator():
        while True:
            yield gen_batch_inputs(generate_input_by_batch(X),
                                   token_dict,
                                   token_list,
                                   seq_len=512,
                                   mask_rate=0.3)

    opt_filepath = sys.argv[1]
    checkpoint = ModelCheckpoint(opt_filepath,
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min',
                                 save_weights_only=True)
    reduce_lr = ReduceLROnPlateau(monitor='val_loss',
                                  factor=0.5,
                                  patience=10,
                                  verbose=1,
                                  mode='auto',
                                  min_delta=0.1,
                                  cooldown=10,
                                  min_lr=1e-10)
    es = EarlyStopping(monitor='val_loss', patience=50)
    callbacks_list = [checkpoint, es, reduce_lr]

    model.fit_generator(generator=_generator(),
                        steps_per_epoch=500,
                        epochs=5000,
                        validation_data=_generator(),
                        validation_steps=200,
                        callbacks=callbacks_list)
Ejemplo n.º 4
0
 def test_save_load_json(self):
     model = get_model(
         token_num=200,
         head_num=3,
         transformer_num=2,
         attention_activation='gelu',
     )
     compile_model(model)
     data = model.to_json()
     # 保存模型到json文件
     with open('./mybert_config.json', 'w') as file:
         file.write(data)
     set_custom_objects()
     # # 从json对象中加载模型
     # model = keras.models.model_from_json(data)
     # 从json文件中加载模型
     with open('./mybert_config.json', 'r') as file:
         model_json = file.read()
     model = keras.models.model_from_json(model_json)
     model.summary()
Ejemplo n.º 5
0
with strategy.scope():

    model = get_model(
        token_num=200000,
        #        head_num=25,
        #        transformer_num=20,
        embed_dim=4 8 *2,
        feed_forward_dim=512,
        seq_len=100,
        pos_num=100,  # 128
        training=True,
        trainable=None,
        dropout_rate=0.1,
    )
    compile_model(model)

#    model = load_trained_model_from_checkpoint(
#        config_path,
#        checkpoint_path,
#        training=True,
#        trainable=True,
#        seq_len=SEQ_LEN,
#    )


print(model.summary())
# @title Download IMDB Data
import tensorflow as tf

dataset = tf.keras.utils.get_file(
Ejemplo n.º 6
0
    def test_fit(self):
        current_path = os.path.dirname(os.path.abspath(__file__))
        model_path = os.path.join(current_path, 'test_bert_fit.h5')
        sentence_pairs = [
            [['all', 'work', 'and', 'no', 'play'],
             ['makes', 'jack', 'a', 'dull', 'boy']],
            [['from', 'the', 'day', 'forth'], ['my', 'arm', 'changed']],
            [['and', 'a', 'voice', 'echoed'],
             ['power', 'give', 'me', 'more', 'power']],
        ]
        token_dict = get_base_dict()
        for pairs in sentence_pairs:
            for token in pairs[0] + pairs[1]:
                if token not in token_dict:
                    token_dict[token] = len(token_dict)
        token_list = list(token_dict.keys())
        if os.path.exists(model_path):
            steps_per_epoch = 10
            model = keras.models.load_model(
                model_path,
                custom_objects=get_custom_objects(),
            )
        else:
            steps_per_epoch = 1000
            model = get_model(
                token_num=len(token_dict),
                head_num=5,
                transformer_num=12,
                embed_dim=25,
                feed_forward_dim=100,
                seq_len=20,
                pos_num=20,
                dropout_rate=0.05,
                attention_activation='gelu',
            )
            compile_model(
                model,
                learning_rate=1e-3,
                decay_steps=30000,
                warmup_steps=10000,
                weight_decay=1e-3,
            )
        model.summary()

        def _generator():
            while True:
                yield gen_batch_inputs(
                    sentence_pairs,
                    token_dict,
                    token_list,
                    seq_len=20,
                    mask_rate=0.3,
                    swap_sentence_rate=1.0,
                )

        model.fit_generator(
            generator=_generator(),
            steps_per_epoch=steps_per_epoch,
            epochs=1,
            validation_data=_generator(),
            validation_steps=steps_per_epoch // 10,
        )
        # model.save(model_path)
        for inputs, outputs in _generator():
            predicts = model.predict(inputs)
            outputs = list(map(lambda x: np.squeeze(x, axis=-1), outputs))
            predicts = list(map(lambda x: np.argmax(x, axis=-1), predicts))
            batch_size, seq_len = inputs[-1].shape
            for i in range(batch_size):
                match, total = 0, 0
                for j in range(seq_len):
                    if inputs[-1][i][j]:
                        total += 1
                        if outputs[0][i][j] == predicts[0][i][j]:
                            match += 1
                self.assertGreater(match, total * 0.9)
            self.assertTrue(np.allclose(outputs[1], predicts[1]))
            break
Ejemplo n.º 7
0
def GetModel(ucfg):
    ''' ucfg: user's Config for the table output: nnname, BS, BPE '''

    nnname = ucfg['nnname']
    isconv = True

    if nnname == 'newmodel':
        import sys
        sys.path.append("..")
        from newmodel import tfmodel
        model, isconv = tfmodel()
        sys.path.remove("..")

    import tensorflow.keras.applications as nn
    if hasattr(nn, nnname):
        model = getattr(nn, nnname)(weights=None)

    # efficientnet: B0-B7
    elif nnname[:-2] == 'EfficientNet':
        import tfmodels.efficientnet.tfkeras as nn
        model = getattr(nn, nnname)(weights=None)

    # TF2.x Models:
    elif nnname == 'ncf':
        import tfmodels.ncf as nn
        name = 'ncfmodel'
        model = getattr(nn, name)(istrain=False)
        isconv = False

    elif nnname == 'din':
        import tfmodels.din as nn
        name = 'din'
        _, model = getattr(nn, name)(item_count=63001,
                                     cate_count=801,
                                     hidden_units=128)
        isconv = False

    # bert from bert_keras
    elif nnname == 'bert':
        isconv = False
        from keras_bert import get_base_dict, get_model, compile_model
        # Build token dictionary
        token_dict = get_base_dict()
        training = True
        if training:
            # # bert base
            # embed_dim=768 # bert small
            # headnum=12
            # layernum=12
            # bert large
            embed_dim = 1024  # bert small
            headnum = 16
            layernum = 24

            ff_dim = embed_dim * 4
            token_num = 30522  # number of words from paper
            model = get_model(token_num=token_num,
                              pos_num=512,
                              seq_len=512,
                              embed_dim=embed_dim,
                              transformer_num=layernum,
                              head_num=headnum,
                              feed_forward_dim=ff_dim,
                              training=training)
        else:
            # Revise lib\site-packages\keras_bert\bert.py: line164
            # "return inputs, transformed" -> "return inputs, transformed,model"
            _, _, model = get_model(token_num=len(token_dict),
                                    embed_dim=1024,
                                    head_num=16,
                                    training=training)

        compile_model(model)

    if nnname == 'mymodel':
        isconv = False

        ## ===== To add a customized model ====
        # refer to: https://keras.io/guides/sequential_model/
        from tensorflow.keras import layers
        # Define a customized model
        model = keras.Sequential()
        model.add(keras.Input(shape=(250, 250, 3)))  # 250x250 RGB images
        model.add(layers.Conv2D(32, 5, strides=2, activation="relu"))
        model.add(layers.Conv2D(32, 3, activation="relu"))
        model.add(layers.MaxPooling2D(3))
        model.add(layers.Conv2D(32, 3, activation="relu"))
        model.add(layers.Conv2D(32, 3, activation="relu"))
        model.add(layers.MaxPooling2D(3))
        model.add(layers.Conv2D(32, 3, activation="relu"))
        model.add(layers.Conv2D(32, 3, activation="relu"))
        model.add(layers.MaxPooling2D(2))
        # Now that we have 4x4 feature maps, time to apply global max pooling.
        model.add(layers.GlobalMaxPooling2D())
        # Finally, we add a classification layer.
        model.add(layers.Dense(10))

        ## ===== end of your codes  ======

    if True:
        g = keras.utils.model_to_dot(model, show_shapes=True)
        if nnname == 'newmodel':
            nnname = ucfg['model']
        g.write_pdf(".//outputs//tf//" + nnname + '.pdf')
    return model, isconv
Ejemplo n.º 8
0
def pretrain_model(opt_filepath, data_dir, gpu_id):
    os.environ['CUDA_VISIBLE_DEVICES'] = gpu_id
    #gpus = tf.config.experimental.list_physical_devices('GPU')
    #tf.config.experimental.set_memory_growth(gpus[0], True)

    token_dict = load_vocabulary(dict_path)
    token_list = list(token_dict.keys())
    #if not os.path.exists(os.path.join(data_dir, 'pretrain_X.npy')):
    df = pd.read_csv(os.path.join(data_dir, 'task2_trainset.csv'), dtype=str)
    df_2 = pd.read_csv(os.path.join(data_dir, 'task2_public_testset.csv'),
                       dtype=str)
    abstract_1 = df.values[:, 2]
    abstract_2 = df_2.values[:, 2]
    tokenizer = Tokenizer(token_dict)
    X_1 = collect_inputs(abstract_1, tokenizer)
    X_2 = collect_inputs(abstract_2, tokenizer)
    X = np.array(X_1 + X_2)
    #    np.save(os.path.join(data_dir, 'pretrain_X.npy'), X)
    #else:
    #    X = np.load(os.path.join(data_dir, 'pretrain_X.npy'))
    print(X.shape)

    model = load_trained_model_from_checkpoint(config_path,
                                               checkpoint_path,
                                               training=True,
                                               trainable=get_layers_name(
                                                   range(12, 25)),
                                               seq_len=512)
    compile_model(model)

    def _generator(batch_size=4):
        while True:
            idx = np.random.permutation(X.shape[0])
            for i in range(0, idx.shape[0], batch_size):
                yield gen_batch_inputs(X[i:i + batch_size],
                                       token_dict,
                                       token_list,
                                       seq_len=512,
                                       mask_rate=0.3)

    checkpoint = ModelCheckpoint(opt_filepath,
                                 monitor='val_loss',
                                 verbose=1,
                                 save_best_only=True,
                                 mode='min',
                                 save_weights_only=True)

    trainable_layer = list(range(12 * 8, 19 * 8, 8))
    batch_size = [3] * 3 + [3] * 3
    for i, layer_i in enumerate(trainable_layer):
        for j, layer in enumerate(model.layers):
            if j >= layer_i:
                layer.trainable = True
                print(layer.name, layer.trainable)
            else:
                layer.trainable = False

        compile_model(model)
        if os.path.exists(opt_filepath):
            model.load_weights(opt_filepath)

        es = EarlyStopping(monitor='val_loss', patience=20)
        reduce_lr = ReduceLROnPlateau(factor=0.7,
                                      patience=4,
                                      verbose=1,
                                      min_lr=1e-6)
        callbacks_list = [checkpoint, es, reduce_lr]

        model.fit_generator(generator=_generator(batch_size[i]),
                            steps_per_epoch=500,
                            epochs=5000,
                            validation_data=_generator(),
                            validation_steps=200,
                            callbacks=callbacks_list)