예제 #1
0
 def test_legacy(self):
     opt = AdamWarmup(
         decay_steps=10000,
         warmup_steps=5000,
         learning_rate=1e-3,
     )
     if not TF_KERAS:
         opt.lr = opt.lr
예제 #2
0
    def test_fit_embed(self):
        model = keras.models.Sequential()
        model.add(
            keras.layers.Embedding(
                input_shape=(None, ),
                input_dim=5,
                output_dim=16,
                mask_zero=True,
            ))
        model.add(keras.layers.Bidirectional(keras.layers.LSTM(units=8)))
        model.add(keras.layers.Dense(units=2, activation='softmax'))
        model.compile(AdamWarmup(
            decay_steps=10000,
            warmup_steps=5000,
            learning_rate=1e-3,
            min_lr=1e-4,
            amsgrad=True,
            weight_decay=1e-3,
        ),
                      loss='sparse_categorical_crossentropy')

        x = np.random.randint(0, 5, (1024, 15))
        y = (x[:, 1] > 2).astype('int32')
        model.fit(x, y, epochs=10, verbose=1)

        model_path = os.path.join(tempfile.gettempdir(),
                                  'test_warmup_%f.h5' % np.random.random())
        model.save(model_path)
        from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
        with CustomObjectScope({
                'AdamWarmup': AdamWarmup
        }):  # Workaround for incorrect global variable used in keras
            keras.models.load_model(model_path,
                                    custom_objects={'AdamWarmup': AdamWarmup})
예제 #3
0
    def test_fit_embed(self):
        model = keras.models.Sequential()
        model.add(
            keras.layers.Embedding(
                input_shape=(None, ),
                input_dim=5,
                output_dim=16,
                mask_zero=True,
            ))
        model.add(keras.layers.Bidirectional(keras.layers.LSTM(units=8)))
        model.add(keras.layers.Dense(units=2, activation='softmax'))
        model.compile(AdamWarmup(
            decay_steps=10000,
            warmup_steps=5000,
            lr=1e-3,
            min_lr=1e-4,
            amsgrad=True,
            weight_decay=1e-3,
        ),
                      loss='sparse_categorical_crossentropy')

        x = np.random.randint(0, 5, (1024, 15))
        y = (x[:, 1] > 2).astype('int32')
        model.fit(x, y, epochs=10)

        model_path = os.path.join(tempfile.gettempdir(),
                                  'test_warmup_%f.h5' % np.random.random())
        model.save(model_path)
        keras.models.load_model(model_path,
                                custom_objects={'AdamWarmup': AdamWarmup})
예제 #4
0
def Graph(total_steps, warmup_steps, lr=1e-3, min_lr=1e-5):
    with graph.as_default():
        bert_model = load_trained_model_from_checkpoint(
            config_path, checkpoint_path)

        for l in bert_model.layers:
            l.trainable = True

        x_in = Input(shape=(None, ))
        c_in = Input(shape=(None, ))
        start_in = Input(shape=(None, ))
        end_in = Input(shape=(None, ))

        x, c, start, end = x_in, c_in, start_in, end_in
        x_mask = Lambda(
            lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(x)

        x = bert_model([x, c])

        x_s = Attention(16, 48)([x, x, x, x_mask, x_mask])
        x_s = Lambda(lambda x: x[0] + x[1])([x, x_s])
        x_s = LayerNormalization()(x_s)
        x_s_co = Dense(768, use_bias=False)(x_s)
        x_s_out = Lambda(lambda x: x[0] + x[1])([x_s, x_s_co])
        x_s_out = LayerNormalization()(x_s_out)
        x_s_out = Lambda(lambda x: x[0] * x[1])([x_s_out, x_mask])
        ps1 = Dense(1, use_bias=False)(x_s_out)
        ps1 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)(
            [ps1, x_mask])

        x_e = Attention(16, 48)([x, x, x, x_mask, x_mask])
        x_e = Lambda(lambda x: x[0] + x[1])([x, x_e])
        x_e = LayerNormalization()(x_e)
        x_e_co = Dense(768, use_bias=False)(x_e)
        x_e_out = Lambda(lambda x: x[0] + x[1])([x_e, x_e_co])
        x_e_out = LayerNormalization()(x_e_out)
        x_e_out = Lambda(lambda x: x[0] * x[1])([x_e_out, x_mask])
        ps2 = Dense(1, use_bias=False)(x_e_out)
        ps2 = Lambda(lambda x: x[0][..., 0] - (1 - x[1][..., 0]) * 1e10)(
            [ps2, x_mask])

        test_model = Model([x_in, c_in], [ps1, ps2])

        train_model = Model([x_in, c_in, start_in, end_in], [ps1, ps2])

        loss1 = K.mean(
            K.categorical_crossentropy(start_in, ps1, from_logits=True))
        ps2 -= (1 - K.cumsum(start, 1)) * 1e10
        loss2 = K.mean(
            K.categorical_crossentropy(end_in, ps2, from_logits=True))
        loss = loss1 + loss2

        train_model.add_loss(loss)
        train_model.compile(
            optimizer=AdamWarmup(total_steps, warmup_steps, lr, min_lr))
        train_model.summary()

        return train_model, test_model
    def train(self):
        x_trn, y_trn = self.train_data['text'][:].values, self.train_data['label'][:].values
        x_val, y_val = self.dev_data['text'][:].values, self.dev_data['label'][:].values
        x_test, y_test = self.test_data['text'][:].values, self.test_data['label'][:].values
        folds, batch_size, steps, max_len = 5, 16, 30, 300
        y_vals_vote = np.zeros(len(y_val))
        best_score = 0
        model = self.create_model()
        total_steps, warmup_steps = calc_train_steps(num_example=x_trn.shape[0],
                                                     batch_size=batch_size, epochs=steps, warmup_proportion=0.2)
        adamwarmup = AdamWarmup(total_steps, warmup_steps, lr=1e-4, min_lr=1e-6)
        losses = [self.distil_loss(), self.distil_loss(soft=True, T=self.T)]
        if self.Mode == 'patient':
            losses.extend([self.distil_loss(soft=True, T=self.T)])
        elif self.Mode == 'patient.full':
            losses.extend([self.distil_loss(soft=True, T=self.T),
                           self.distil_loss(soft=True, T=self.T),
                           self.distil_loss(soft=True, T=self.T)])
        model.compile(loss=losses, optimizer=adamwarmup)

        x1_val_tok, x2_val_tok = sentence2token(x_val, max_len=max_len)
        knowledge = self.teacher
        logit, feature10, feature11, feature12 = np.array(knowledge['logit']), \
            np.array(knowledge['layer_10']), np.array(knowledge['layer_11']), np.array(knowledge['layer_12'])
        for epoch in range(steps):
            # ==========train=========== #
            generator = batch_iter(x_trn, y_trn, logit, feature10, feature11, feature12,
                                   max_len=max_len, batch_size=batch_size)
            for x1_tok, x2_tok, log, feat10, feat11, feat12, lab in generator:
                outputs = [np.eye(2)[lab], log]
                if self.Mode == 'patient':
                    outputs.extend([feat12])
                elif self.Mode == 'patient.full':
                    outputs.extend([feat10, feat11, feat12])
                model.train_on_batch(
                    [x1_tok, x2_tok], outputs)
            # ==========eval=========== #
            y_val_pre = model.predict([x1_val_tok, x2_val_tok])[0]
            y_val_vote = np.argmax(y_val_pre, -1)  # 最大的值所在的索引作为预测结果
            f1, auc, acc, recall = score(y_val, y_val_vote)
            # ==========EarlyStop=========== #
            if f1 > best_score:
                patient = 0
                best_score = f1
                y_vals_vote = y_val_vote
                model.save_weights('models/distil_bert_model')

            print('epoch:{}, f1:{}, auc:{}, acc:{}, recall:{}, best_score:{}'.format(
                epoch, f1, auc, acc, recall, best_score))
            patient += 1
            if patient >= 5:
                break
        # ==========加载最优模型预测测试集=========== #
        model.load_weights('models/distil_bert_model')
        x1_test_tok, x2_test_tok = sentence2token(x_test, max_len=max_len)
        predict = np.argmax(model.predict([x1_test_tok, x2_test_tok])[0], -1)
        print('final dev score: ', score(y_val, y_vals_vote))
        print('final test score: ', score(y_test, predict))
예제 #6
0
 def test_fit_amsgrad(self):
     self._test_fit(
         AdamWarmup(
             decay_steps=10000,
             warmup_steps=5000,
             learning_rate=1e-3,
             min_lr=1e-4,
             amsgrad=True,
             weight_decay=1e-3,
         ))
예제 #7
0
 def test_fit(self):
     self._test_fit(
         AdamWarmup(
             decay_steps=10000,
             warmup_steps=5000,
             lr=1e-3,
             min_lr=1e-4,
             amsgrad=False,
             weight_decay=1e-3,
         ))
예제 #8
0
 def _get_opt(num_example, warmup_proportion=0.1, lr=2e-5, min_lr=None):
     total_steps, warmup_steps = calc_train_steps(
         num_example=num_example,
         batch_size=B_SIZE,
         epochs=MAX_EPOCH,
         warmup_proportion=warmup_proportion,
     )
     opt = AdamWarmup(total_steps, warmup_steps, lr=lr, min_lr=min_lr)
     if cfg.get("accum_step", None) and cfg["accum_step"] > 1:
         print("[!] using accum_step = {}".format(cfg["accum_step"]))
         from accum_optimizer import AccumOptimizer
         opt = AccumOptimizer(opt, steps_per_update=cfg["accum_step"])
     
     return opt
    def get_opt(num_example, warmup_proportion=0.1, lr=2e-5, min_lr=None):
        if cfg["opt"].lower() == "nadam":
            opt = Nadam(lr=lr)
        else:
            total_steps, warmup_steps = calc_train_steps(
                num_example=num_example,
                batch_size=B_SIZE,
                epochs=MAX_EPOCH,
                warmup_proportion=warmup_proportion,
            )

            opt = AdamWarmup(total_steps, warmup_steps, lr=lr, min_lr=min_lr)

        return opt
예제 #10
0
def create_optimizer(num_example, options):
    total_steps, warmup_steps = calc_train_steps(
        num_example=num_example,
        batch_size=options.batch_size,
        epochs=options.num_train_epochs,
        warmup_proportion=options.warmup_proportion,
    )
    optimizer = AdamWarmup(
        total_steps,
        warmup_steps,
        lr=options.learning_rate,
        epsilon=1e-6,
        weight_decay=0.01,
        weight_decay_pattern=['embeddings', 'kernel', 'W1', 'W2', 'Wk', 'Wq', 'Wv', 'Wo']
    )
    return optimizer
예제 #11
0
    def test_fit(self):
        x = np.random.standard_normal((1000, 5))
        y = np.dot(x, np.random.standard_normal((5, 2))).argmax(axis=-1)
        model = keras.models.Sequential()
        model.add(
            keras.layers.Dense(
                units=2,
                input_shape=(5, ),
                kernel_constraint=keras.constraints.MaxNorm(1000.0),
                activation='softmax',
            ))
        model.compile(
            optimizer=AdamWarmup(
                decay_steps=10000,
                warmup_steps=5000,
                lr=1e-3,
                min_lr=1e-4,
                amsgrad=True,
                weight_decay=1e-3,
            ),
            loss='sparse_categorical_crossentropy',
        )
        model.fit(
            x,
            y,
            batch_size=10,
            epochs=110,
            callbacks=[
                keras.callbacks.EarlyStopping(monitor='loss',
                                              min_delta=1e-4,
                                              patience=3)
            ],
        )

        if not EAGER_MODE:
            model_path = os.path.join(
                tempfile.gettempdir(),
                'keras_warmup_%f.h5' % np.random.random())
            model.save(model_path)
            model = keras.models.load_model(
                model_path, custom_objects={'AdamWarmup': AdamWarmup})

        results = model.predict(x).argmax(axis=-1)
        diff = np.sum(np.abs(y - results))
        self.assertLess(diff, 100)
예제 #12
0
def model_build(len_train):
    global NUM_CLASSES
    global BATCH_SIZE
    global NUM_EPOCHS
    global MIN_LR
    global LR

    bert_model = load_trained_model_from_checkpoint(config_path,
                                                    checkpoint_path,
                                                    seq_len=MAXLEN,
                                                    trainable=True)

    x1_in = Input(shape=(None, ))
    x2_in = Input(shape=(None, ))
    aux_in = Input(shape=(2, ))

    inputs = bert_model([x1_in, x2_in])
    bert = Lambda(lambda x: x[:, 0])(inputs)
    dense = concatenate([bert, aux_in])
    outputs = Dense(NUM_CLASSES, activation='softmax')(dense)
    model = Model([x1_in, x2_in, aux_in], outputs)

    decay_steps, warmup_steps = calc_train_steps(
        len_train,
        batch_size=BATCH_SIZE,
        epochs=NUM_EPOCHS,
    )

    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=AdamWarmup(
                      decay_steps=decay_steps,
                      warmup_steps=warmup_steps,
                      lr=LR,
                      min_lr=MIN_LR,
                  ),
                  metrics=['sparse_categorical_accuracy'])
    del bert_model
    gc.collect()
    return model
예제 #13
0
    def compile_model(self, data_size, loss_fn, metrics):
        inputs = self.pretrained_model.inputs[:2]
        dense = self.pretrained_model.get_layer('NSP-Dense').output
        outputs = keras.layers.Dense(units=2, activation='softmax')(dense)

        decay_steps, warmup_steps = calc_train_steps(
            data_size,
            batch_size=self.batch_size,
            epochs=self.epochs,
        )

        model = keras.models.Model(inputs, outputs)

        model.compile(
            AdamWarmup(decay_steps=decay_steps,
                       warmup_steps=warmup_steps,
                       lr=self.lr),
            loss=loss_fn,
            metrics=[metrics],
        )
        self.model = model
        print(self.model.summary())
        return self.model
예제 #14
0
def Graph(total_steps, warmup_steps, lr=1e-3, min_lr=1e-5):
    with graph.as_default():
        x_in = Input(shape=(None,))  # 1行none列,2维
        c_in = Input(shape=(None,))
        start_in = Input(shape=(None,))
        end_in = Input(shape=(None,))

        x, c, start, end = x_in, c_in, start_in, end_in
        # (None,,1) 先增加一个维度变成三维的,然后跟0比较得到一个三维的取值为True/False的表,再把True/False转化成浮点数0./1.
        x_mask = (lambda x: K.cast(K.greater(K.expand_dims(x, 2), 0), 'float32'))(x)
        # 加载模型
        bert = load_trained_model_from_checkpoint(congig_path, checkpoint_path)
        for l in bert.layers:
            l.trainale = True
        # 生成句向量
        x = bert([x, c])
        # start index
        ps1 = Dense(1, use_bias=False)(x)
        # 加mask:将padding的部分置为很小很小的数  1e10=10的10次方
        ps1 = (lambda x: x[0][..., 0] - (1-x[1][..., 0])*1e10)([ps1, x_mask])
        # end index
        ps2 = Dense(1, use_bias=False)(x)
        ps2 = (lambda x: x[0][..., 0] - (1-x[1][..., 0])*1e10)([ps2, x_mask])

        test_model = Model([x_in, c_in], [ps1, ps2])
        train_model = Model([x_in, c_in, start_in, end_in], [ps1, [ps2]])

        loss_1 = K.mean(K.categorical_crossentropy(start_in, ps1, from_logits=True))
        ps2 -= (1-K.cumsum(start, 1))*1e10
        loss_2 = K.mean(K.categorical_crossentropy(end_in, ps2, from_logits=True))
        loss = loss_1 + loss_2

        train_model.add_loss(loss)
        train_model.compile(optimizer=AdamWarmup(total_steps, warmup_steps, min_lr=min_lr, lr=lr))
        train_model.summary()

        return train_model, test_model
예제 #15
0
def build(model, num, lr=0.00002):
    # @title Build Custom Model
    from tensorflow.python import keras
    from keras_bert import AdamWarmup, calc_train_steps

    inputs = model.inputs[:2]
    dense = model.get_layer('NSP-Dense').output
    outputs = keras.layers.Dense(units=len(le.classes_),
                                 activation='softmax')(dense)

    decay_steps, warmup_steps = calc_train_steps(
        num,
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
    )

    model = keras.models.Model(inputs, outputs)

    for x in range(len(model.layers)):
        #print(x)
        model.layers[x].trainable = True
    '''
    model.layers[-3].trainable = True
    model.layers[-4].trainable = True
    model.layers[-5].trainable = True
    model.layers[-6].trainable = True
    model.layers[-7].trainable = True
    '''
    model.layers[-1].trainable = True
    model.layers[-2].trainable = True

    model.compile(
        AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=lr),
        loss='sparse_categorical_crossentropy',
        metrics=['sparse_categorical_accuracy'],
    )
    return model
예제 #16
0
def main():
    args = get_args()

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    np.random.seed(args.seed)

    if args.verbose:
        log.basicConfig(level=log.DEBUG, stream=sys.stdout)
    else:
        log.basicConfig(level=log.INFO, stream=sys.stdout)

    log.info('\n' + tabulate(sorted(vars(args).items())))
    set_logger(os.path.join(args.output_dir, args.log_file))

    pick_device()
    data = load_instances(args.dataset, args.label_col)
    classes = list(sorted(set(data[args.label_col])))
    args.n_classes = len(classes)

    token_dict = load_vocabulary(args.vocab_file)
    tokenizer = Tokenizer(token_dict)

    if args.do_train:
        folds = [i for i in args.train_dataset.split(',')]
        train_df = data[data['fold'].isin(folds)].reset_index(drop=True)
        train_generator = TextDataFrameIterator(
            dataframe=train_df,
            tokenizer=tokenizer,
            classes=classes,
            x_col=args.text_col,
            y_col=args.label_col,
            batch_size=args.batch_size,
            shuffle=True,
            seq_len=args.max_seq_length,
            seed=args.seed,
            do_lower_case=args.do_lower_case
        )

        folds = [i for i in args.val_dataset.split(',')]
        val_df = data[data['fold'].isin(folds)].reset_index(drop=True)
        val_generator = TextDataFrameIterator(
            dataframe=val_df,
            tokenizer=tokenizer,
            classes=classes,
            x_col=args.text_col,
            y_col=args.label_col,
            batch_size=args.batch_size,
            shuffle=False,
            seq_len=args.max_seq_length,
            do_lower_case=args.do_lower_case
        )

        total_steps, warmup_steps = calc_train_steps(
            num_example=len(train_df),
            batch_size=args.batch_size,
            epochs=args.epochs,
            warmup_proportion=args.warmup_proportion,
        )

        model = get_model(args)
        earlystop = callbacks.EarlyStopping(
            monitor='val_loss', min_delta=K.epsilon(), patience=args.earlystop,
            verbose=1, mode='auto')
        best_checkpoint = callbacks.ModelCheckpoint(
            os.path.join(args.output_dir, args.best_model),
            save_best_only=True, save_weights_only=False,
            monitor='val_loss', mode='min', verbose=1)
        csv_logger = callbacks.CSVLogger(os.path.join(args.output_dir, args.csv_logger))

        callbacks_list = [earlystop, best_checkpoint, csv_logger]
        optimizer = AdamWarmup(
            decay_steps=total_steps,
            warmup_steps=warmup_steps,
            lr=args.learning_rate,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-6,
            min_lr=1e-5,
            weight_decay=0.01,
            weight_decay_pattern=['embeddings', 'kernel', 'W1', 'W2', 'Wk', 'Wq', 'Wv', 'Wo']
        )
        model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

        cw = get_class_weights(data, args.label_col, train_generator.class_indices)
        model.fit_generator(
            train_generator,
            class_weight=cw,
            use_multiprocessing=False,
            workers=args.workers,
            callbacks=callbacks_list,
            epochs=args.epochs,
            validation_data=val_generator,
            verbose=1)

    if args.do_test:
        folds = [i for i in args.test_dataset.split(',')]
        test_df = data[data['fold'].isin(folds)].reset_index(drop=True)
        test_generator = TextDataFrameIterator(
            dataframe=test_df,
            tokenizer=tokenizer,
            classes=classes,
            x_col=args.text_col,
            y_col=args.label_col,
            batch_size=args.batch_size,
            shuffle=False,
            seq_len=args.max_seq_length,
            do_lower_case=args.do_lower_case
        )

        print('Load from %s', os.path.join(args.output_dir, args.best_model))
        model = load_model(os.path.join(args.output_dir, args.best_model), custom_objects=get_custom_objects())
        # model.summary()
        y_score = model.predict_generator(
            test_generator,
            use_multiprocessing=False,
            workers=args.workers,
            verbose=1)

        y_pred = np.argmax(y_score, axis=1)

        pred_df = pd.DataFrame(y_score, columns=classes)
        pred_df = pred_df.assign(predictions=[classes[lbl] for lbl in y_pred])

        y_true = test_df.loc[:, args.label_col].values
        y_pred = pred_df['predictions'].values
        report = pmetrics.classification_report(y_true, y_pred, classes=classes)
        print(report.summary())
        # print('auc', pmetrics.auc(y_true, y_score, y_column=1)[0])

        result = pd.concat([test_df, pred_df], axis=1)
        result.to_csv(os.path.join(args.output_dir, args.test_predictions), index=False)

    if args.do_predict:
        test_df = load_instances(args.pred_dataset, args.label_col)
        test_generator = TextDataFrameIterator(
            dataframe=test_df,
            tokenizer=tokenizer,
            classes=None,
            x_col=args.text_col,
            y_col=args.label_col,
            batch_size=args.batch_size,
            shuffle=False,
            seq_len=args.max_seq_length,
            do_lower_case=args.do_lower_case
        )

        print('Load from %s', os.path.join(args.output_dir, args.best_model))
        model = load_model(os.path.join(args.output_dir, args.best_model), custom_objects=get_custom_objects())
        # model.summary()
        y_score = model.predict_generator(
            test_generator,
            use_multiprocessing=False,
            workers=args.workers,
            verbose=1)
        y_pred = np.argmax(y_score, axis=1)

        pred_df = pd.DataFrame(y_score, columns=classes)
        pred_df = pred_df.assign(predictions=[classes[lbl] for lbl in y_pred])
        result = pd.concat([test_df, pred_df], axis=1)
        result.to_csv(os.path.join(args.output_dir, args.pred_predictions), index=False)

    if args.do_debug:
        for dataset in [args.train_dataset, args.val_dataset, args.test_dataset]:
            folds = [i for i in dataset.split(',')]
            print('folds:', folds)
            sub_df = data[data['fold'].isin(folds)]
            generator = TextDataFrameIterator(
                dataframe=sub_df,
                tokenizer=tokenizer,
                x_col=args.text_col,
                y_col=args.label_col,
                batch_size=args.batch_size,
                shuffle=False,
                seq_len=args.max_seq_length,
            )
            for i, ([tokens, _], labels) in enumerate(generator):
                print(tokens.shape, type(tokens), labels.shape, type(labels))
                if i == 2:
                    break
예제 #17
0
def main():
    seq_id, seq_O, seq_P, id_to_label, id_to_term = encode_seq(
        df_label=df_label, maxlen=MAX_LEN)

    class Evaluation(Callback):
        def __init__(self, val_data, interval=1):
            self.val_data = val_data
            self.interval = interval
            self.best_f1 = 0.

            self.true_vp_val = [
                (row["id"], row["OpinionTerms"], row["Polarities"],
                 row['O_start'], row['O_end']) for rowid, row in df_label[
                     df_label['id'].isin(self.val_data[0])].iterrows()
            ]

        def on_epoch_end(self, epoch, log={}):
            if epoch % self.interval == 0:
                o_out, p_out = pred_model.predict(
                    self.val_data[1:4], batch_size=BATCH_SIZE)  # CRF概率
                o_pred = np.argmax(o_out, axis=2)
                p_pred = np.argmax(p_out, axis=2)

                texts = [
                    df_review[df_review['id'] == i]["Reviews"].values[0]
                    for i in self.val_data[0]
                ]

                pred_vp_val = decode_seq(self.val_data[0], o_pred, p_pred,
                                         id_to_label, texts)

                precision, recall, f1 = cal_opinion_metrics(
                    pred_vp_val, self.true_vp_val)
                if f1 > self.best_f1:
                    self.best_f1 = f1
                    self.model.save_weights(
                        f'./model_op/op_model_0924_viteb.weights')
                    print(f'best = {f1}')

    tokenizer = BertTokenizer(token_dict)

    seq_input, seq_seg = bert_text_to_seq(list(df_review["Reviews"]),
                                          tokenizer,
                                          maxlen=MAX_LEN)

    true_vp = [(row["id"], row["OpinionTerms"], row["Polarities"],
                row['O_start'], row['O_end'])
               for rowid, row in df_label.iterrows()]

    pred_vp = decode_seq(seq_id, seq_O, seq_P, id_to_label,
                         list(df_review["Reviews"]))

    cal_opinion_metrics(pred_vp, true_vp)

    seq_O = to_categorical(seq_O)

    seq_P = to_categorical(seq_P)

    df_review['pos_tag'] = df_review['Reviews'].progress_apply(pos_tag)

    with open('./data/postag2id_0922_laptop_make_up.pkl', 'rb') as f:
        postag2id = pickle.load(f)

    df_review['pos_tag'] = df_review['pos_tag'].progress_apply(
        lambda postag: [postag2id[x] for x in postag])

    seq_postag = np.array(df_review['pos_tag'].values.tolist())

    view_train, view_val = split_viewpoints(seq_id, seq_input, seq_seg, seq_O,
                                            seq_P, seq_postag)

    print(view_val[0])
    print('------------------- 保存验证集的id ---------------------')
    print('保存final 验证集的val ids')

    # np.save('./data/final_makeup_laptop_val_ids', view_val[0])
    print('------------------- 保存完毕 ---------------------------')
    # exit()
    bert_model = load_trained_model_from_checkpoint(config_path,
                                                    checkpoint_path,
                                                    seq_len=None)
    for l in bert_model.layers:
        l.trainable = True

    x1_in = Input(shape=(MAX_LEN, ), name='x1_in')
    x2_in = Input(shape=(MAX_LEN, ), name='x2_in')
    o_in = Input(shape=(
        MAX_LEN,
        len(id_to_term) + 1,
    ), name='o_in')
    p_in = Input(shape=(
        MAX_LEN,
        len(id_to_label) + 1,
    ), name='p_in')

    pos_tag_in = Input(shape=(MAX_LEN, ), name='pos_tag_in')
    pos_tag_emb = Embedding(len(postag2id), POS_TAG_DIM,
                            trainable=True)(pos_tag_in)

    x = bert_model([x1_in, x2_in])
    x = Concatenate()([x, pos_tag_emb])

    p_out = Dense(len(id_to_label) + 1,
                  activation='softmax')(x)  # p_out 是极性的输出
    crf = CRF(len(id_to_term) + 1)
    o_out = crf(x)
    loss_seq_O = crf.loss_function(o_in, o_out)  # 直接加入 Lambda层后 计算图会出错
    loss_seq_O = Lambda(lambda x: K.mean(x))(loss_seq_O)
    # loss_seq_O = Lambda(lambda x: K.mean(categorical_crossentropy(x[0], x[1])), name='loss_seq_O')([o_in, o_out])

    loss_p = Lambda(lambda x: K.mean(categorical_crossentropy(x[0], x[1])),
                    name='loss_c')([p_in, p_out])

    train_model = Model([x1_in, x2_in, pos_tag_in, o_in, p_in], [o_out, p_out])
    pred_model = Model([x1_in, x2_in, pos_tag_in], [o_out, p_out])
    train_model._losses = []
    train_model._per_input_losses = {}
    train_model.add_loss(loss_seq_O)
    train_model.add_loss(loss_p)

    print(view_train[0].shape[0])

    total_steps, warmup_steps = calc_train_steps(
        num_example=view_train[0].shape[0],
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        warmup_proportion=0.1,
    )
    # optimizer = Adam(lr=1e-5)
    optimizer = AdamWarmup(total_steps, warmup_steps, lr=5e-5, min_lr=1e-6)

    train_model.compile(optimizer=optimizer)
    train_model.metrics_tensors.append(loss_seq_O)
    train_model.metrics_names.append('loss_seq_O')
    train_model.metrics_tensors.append(loss_p)
    train_model.metrics_names.append('loss_p')
    train_model.summary()

    eval_callback = Evaluation(val_data=view_val)

    train_model.fit(view_train[1:],
                    epochs=EPOCHS,
                    shuffle=True,
                    batch_size=BATCH_SIZE,
                    callbacks=[eval_callback])
예제 #18
0
def build_model(args):

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    #config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
    K.set_session(tf.Session(config=config))
    
    print("Loading model..")
    custom_objects = get_custom_objects()
    bert_model = load_model(args.model, custom_objects=custom_objects)
    
    for layer in bert_model.layers:
        layer.trainable = False
    
    input_features = [Input(shape=(get_label_dim(args.train),)) for _ in args.features_train]

    stacked = Lambda(lambda x: K.stack(x, axis=1))([bert_model.output, *input_features])

    stacked = Permute((2, 1), name="stack_permute")(stacked)

    output_layer = TimeDistributed(Dense(1, activation="tanh", name="decision"))(stacked)
    output_layer = Flatten(name="time_distributed_flatten")(output_layer)
    output_layer = Activation("softmax")(output_layer)

    # The bert model has multiple inputs, so unpack those.
    model = Model([*bert_model.input, *input_features], output_layer)

    if args.gpus > 1:
        template_model = model
        model = multi_gpu_model(template_model, gpus=args.gpus)

    callbacks = [Metrics()]

    if args.patience > -1:
        callbacks.append(EarlyStopping(patience=args.patience, verbose=1))

    if args.checkpoint_interval > 0:
        callbacks.append(ModelCheckpoint(args.output_file + ".checkpoint-{epoch}",  period=args.checkpoint_interval))

    total_steps, warmup_steps =  calc_train_steps(num_example=get_example_count(args.train),
                                                batch_size=args.batch_size, epochs=args.epochs,
                                                warmup_proportion=0.01)

    optimizer = AdamWarmup(total_steps, warmup_steps, lr=args.lr)

    model.compile(loss=["categorical_crossentropy"], optimizer=optimizer, metrics=[])

    print(model.summary(line_length=118))
    print("Number of GPUs in use:", args.gpus)
    print("Batch size:", args.batch_size)
    print("Learning rate:", args.lr)
    print("Dropout:", args.dropout)

    model.fit_generator(data_generator(args.train, args.batch_size, seq_len=args.seq_len, features=args.features_train),
                        steps_per_epoch=ceil( get_example_count(args.train) / args.batch_size ),
                        use_multiprocessing=True, epochs=args.epochs, callbacks=callbacks,
                        validation_data=data_generator(args.dev, args.eval_batch_size, seq_len=args.seq_len, features=args.features_dev),
                        validation_steps=ceil( get_example_count(args.dev) / args.eval_batch_size ))
                        

    print("Saving model:", args.output_file)
    if args.gpus > 1:
        template_model.save(args.output_file)
    else:
        model.save(args.output_file)
예제 #19
0
def main(argv):
    args = argparser().parse_args(argv[1:])
    bert, vocab = load_pretrained(args)
    tokenizer = Tokenizer(vocab, cased=not args.do_lower_case)
    labels, train_sents, dev_sents, test_sents = load_data(args)

    train_data = create_examples(train_sents, tokenizer, labels, args)
    dev_data = create_examples(dev_sents, tokenizer, labels, args)
    test_data = create_examples(test_sents, tokenizer, labels, args)

    output = Dense(len(labels), activation='softmax')(bert.output)
    model = Model(inputs=bert.inputs, outputs=output)
    model.summary(line_length=80)

    train_input = np.array([e.input_ids for e in train_data])
    train_in_mask = np.array([e.input_mask for e in train_data])
    train_segments = np.array([e.segment_ids for e in train_data])
    train_output = np.expand_dims(
        np.array([e.label_ids for e in train_data]), -1)
    train_head_flags = np.array([e.head_flags for e in train_data])

    total_steps, warmup_steps = calc_train_steps(
        num_example=len(train_input),
        batch_size=args.train_batch_size,
        epochs=args.num_train_epochs,
        warmup_proportion=0.1,
    )

    optimizer = AdamWarmup(
        total_steps,
        warmup_steps,
        lr=args.learning_rate,
        weight_decay=0.01,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-6,
        weight_decay_pattern=['embeddings', 'kernel', 'W1', 'W2', 'Wk', 'Wq', 'Wv', 'Wo'],
        min_lr=0    # TODO
    )

    model.compile(
        loss='sparse_categorical_crossentropy',
        sample_weight_mode='temporal',
        optimizer=optimizer
    )

    dev_input = np.array([e.input_ids for e in dev_data])
    dev_in_mask = np.array([e.input_mask for e in dev_data])
    dev_segments = np.array([e.segment_ids for e in dev_data])
    dev_output = np.expand_dims(np.array([e.label_ids for e in dev_data]),-1)
    dev_head_flags = np.array([e.head_flags for e in dev_data])

    train_start = datetime.now()
    print('start training at', train_start)
    train_cb = EvaluationCallback(
        'train', train_input, train_segments, train_output, train_head_flags)
    dev_cb = EvaluationCallback(
        'dev', dev_input, dev_segments, dev_output, dev_head_flags)
    callbacks = [train_cb, dev_cb]
    model.fit(
        [train_input, train_segments],
        train_output,
        sample_weight=train_in_mask,
        batch_size=args.train_batch_size,
        epochs=args.num_train_epochs,
        verbose=1,
        callbacks=callbacks
    )
    train_end = datetime.now()
    print('done training', train_end, 'time', train_end-train_start)

    if args.predict is not None:
        if args.predict == 'dev':
            pred_data, pred_sents = dev_data, dev_sents
        else:
            assert args.predict == 'test'
            pred_data, pred_sents = test_data, test_sents
        pred_input = np.array([e.input_ids for e in pred_data])
        pred_segments = np.array([e.segment_ids for e in pred_data])
        pred = model.predict(
            [pred_input, pred_segments],
            verbose=1
        )
        pred_tokens = [[t for t, _ in s] for s in pred_sents]
        pred_head_flags = np.array([e.head_flags for e in pred_data])
        write_predictions(pred_tokens, pred_input, pred_head_flags,
                          pred, vocab, labels, args.output)
    print('best dev result', dev_cb.best, 'for epoch', dev_cb.best_epoch)
    return 0
예제 #20
0
# base_model.fit([train_sentence_indices], train_tags, validation_data=([devel_sentence_indices], devel_tags), batch_size=batch_size, epochs=50, verbose=1)

print("Loading BERT")

total_steps, warmup_steps = calc_train_steps(
    num_example=len(train_sentences),
    batch_size=batch_size,
    epochs=10,
    warmup_proportion=0.1,
)

print(total_steps, warmup_steps)

optimizer = AdamWarmup(5 * total_steps,
                       warmup_steps,
                       lr=2e-5,
                       min_lr=2e-7,
                       weight_decay=weight_decay)

# import pdb; pdb.set_trace()
bert_model = load_trained_model_from_checkpoint(config_path,
                                                checkpoint_path,
                                                training=False,
                                                trainable=True,
                                                seq_len=max_sequence_len)
# bert_model, _ = build_model_from_config(config_path, training=False, trainable=True, seq_len=max_sequence_len)
#bert_model.summary(line_length=120)

if use_crf:
    #prediction_layer = layers.Dense(768, activation='tanh')(bert_model.output)
    prediction_layer = CRF(len(tag_dict),
예제 #21
0
    trainable=True,
    seq_len=SEQ_LEN,
)

inputs = model.inputs[:2]
dense = model.get_layer('NSP-Dense').output
outputs = keras.layers.Dense(units=1, activation='sigmoid')(dense)
model = keras.models.Model(inputs, outputs)

total_steps, warmup_steps = calc_train_steps(
    num_example=x_train[0].shape[0],
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    warmup_proportion=0.1,
)
optimizer = AdamWarmup(total_steps, warmup_steps, lr=1e-4, min_lr=LR)

model.compile(
    RAdam(LR),
    loss='binary_crossentropy',
    metrics=['accuracy'],
)
model.summary()

sess = K.get_session()
uninitialized_variables = set(
    [i.decode('ascii') for i in sess.run(tf.report_uninitialized_variables())])
init_op = tf.variables_initializer([
    v for v in tf.global_variables()
    if v.name.split(':')[0] in uninitialized_variables
])
예제 #22
0
def manual_train():
    #frac = args.frac
    args = get_args()
    fold = args.fold
    EPOCHS = args.epochs

    BATCH_SIZE = 128
    LR = 1e-4

    with timed_bolck(f'Prepare train data#{BATCH_SIZE}'):
        X, y, _ = get_train_test_bert()




        ##Begin to define model
        from keras_bert import load_trained_model_from_checkpoint

        model = load_trained_model_from_checkpoint(config_path, checkpoint_path, training=True, seq_len=SEQ_LEN, )


        from tensorflow.python import keras
        from keras_bert import AdamWarmup, calc_train_steps
        inputs = model.inputs[:2]
        dense_bert = model.get_layer('NSP-Dense').output


        decay_steps, warmup_steps = calc_train_steps(
            y.shape[0],
            batch_size=BATCH_SIZE,
            epochs=EPOCHS,
        )

        # New input from manual

        data = get_feature_bert_wv().add_prefix('fea_')
        manual_fea_len = len([col for col in data.columns if col.startswith('fea_')])

        logger.info(f'manual_fea_len:{manual_fea_len}')
        manual_feature = keras.Input(shape=(manual_fea_len,), name='manual_feature', dtype='float32')
        inputs = inputs + [manual_feature]


        manual_feature = keras.layers.Dense(round(num_classes*0.6),  name='manual_dense', activation='relu')(manual_feature)
        manual_feature = keras.layers.Dropout(0.5)(manual_feature)
        #manual_feature = keras.layers.Dense(round(num_classes), activation='relu')(manual_feature)

        fc_ex = keras.layers.concatenate([dense_bert, manual_feature], axis=1)
        # End input from manual

        #fc_ex = keras.layers.Dense(units=1024, activation='softmax')(fc_ex)

        outputs = keras.layers.Dense(units=num_classes, activation='softmax')(fc_ex)

        model = keras.models.Model(inputs, outputs)
        model.compile(
            AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=LR),
            loss='categorical_crossentropy',
            metrics=['accuracy'],
        )

        model.summary(line_length=120)
        ##End to define model

        input1_col = [col for col in X.columns if str(col).startswith('bert_')]
        input3_col = [col for col in X.columns if str(col).startswith('fea_')]
        #max_words = len(input1_col)
        model #= get_model(max_words)

        #get_feature_manual.cache_clear()
        Y_cat = keras.utils.to_categorical(y, num_classes=num_classes)
        #folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=2019)

    with timed_bolck(f'Training#{fold}'):
        from core.split import split_df_by_index
        train_idx, test_idx = split_df_by_index(X,fold)

        logger.info(f'Shape train_x.loc[:, input1_col].iloc[:,0]: {X.loc[:, input1_col].iloc[:,0].shape}')
        train_x, train_y, val_x, val_y = \
            X.iloc[train_idx], Y_cat[train_idx], X.iloc[test_idx], Y_cat[test_idx]

        logger.info(f'get_train_test output: train_x:{train_x.shape}, train_y:{train_y.shape}, val_x:{val_x.shape} ')
        #for sn in range(5):
        input1 = train_x.loc[:, input1_col]#.astype(np.float32)
        input2 = np.zeros_like(input1)#.astype(np.int8)
        input3 = train_x.loc[:, input3_col]
        logger.info(f'NN Input1:{input1.shape}, Input2:{input2.shape}, Input3:{input3.shape}')

        logger.info(f'NN train_x:{train_x[:3]}')

        from keras_bert import get_custom_objects
        import tensorflow as tf

        with tf.keras.utils.custom_object_scope(get_custom_objects()):
            his = model.fit([input1, input2, input3], train_y,
                            validation_data = ([
                                                val_x.loc[:, input1_col],
                                                np.zeros_like(val_x.loc[:, input1_col]),
                                                val_x.loc[:, input3_col]
                                               ],
                                               val_y),
                            epochs=EPOCHS,  shuffle=True, batch_size=64,
                            callbacks=[Cal_acc(val_x, y.iloc[test_idx] )]
                      #steps_per_epoch=1000, validation_steps=10
                      )



            #gen_sub(model, X_test, sn)

    return his
    def train(self):
        x_trn, y_trn = self.train_data['text'][:].values, self.train_data[
            'label'][:].values
        x_val, y_val = self.dev_data['text'][:].values, self.dev_data[
            'label'][:].values
        x_test, y_test = self.test_data['text'][:].values, self.test_data[
            'label'][:].values
        folds, batch_size, steps, max_len = 5, 16, 30, 300
        y_vals = np.zeros((len(x_val), 2))
        y_vals_vote = np.zeros(len(x_val))
        y_test_pre = np.zeros((len(x_test), 2))
        knowledge_dict = dict()
        model = self.create_model()
        total_steps, warmup_steps = calc_train_steps(
            num_example=x_trn.shape[0],
            batch_size=batch_size,
            epochs=steps,
            warmup_proportion=0.2)
        adamwarmup = AdamWarmup(total_steps,
                                warmup_steps,
                                lr=1e-5,
                                min_lr=1e-7)
        model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=1e-5))
        model.save_weights('origin')

        patient, best_score = 0, -1
        x1_trn_tok, x2_trn_tok = sentence2token(x_trn, max_len=max_len)
        x1_val_tok, x2_val_tok = sentence2token(x_val, max_len=max_len)
        for epoch in range(steps):
            # ==========train=========== #
            generator = batch_iter(x_trn,
                                   y_trn,
                                   max_len=max_len,
                                   batch_size=batch_size)
            for x1_tok, x2_tok, lab in generator:
                model.train_on_batch([x1_tok, x2_tok], np.eye(2)[lab])
            # ==========eval=========== #
            y_val_pre = model.predict([x1_val_tok, x2_val_tok])
            y_val_vote = np.argmax(y_val_pre, -1)  # 最大的值所在的索引作为预测结果
            f1, auc, acc, recall = score(y_val, y_val_vote)
            # ==========EarlyStop=========== #
            if f1 > best_score:
                patient = 0
                best_score = f1
                y_vals_vote = y_val_vote
                y_vals = y_val_pre
                model.save_weights('weight')
                # =========save knowledge==========
                knowledge_dict = self.save_knowlege(x1_trn_tok, x2_trn_tok,
                                                    model, knowledge_dict)

            print('epoch:{}, f1:{}, auc:{}, acc:{}, recall:{}, best_score:{}'.
                  format(epoch, f1, auc, acc, recall, best_score))
            patient += 1
            if patient >= 5:
                break
        # ==========加载最优模型预测测试集=========== #
        model.load_weights('weight')
        x1_test_tok, x2_test_tok = sentence2token(x_test, max_len=max_len)
        predict = np.argmax(model.predict([x1_test_tok, x2_test_tok]), -1)
        print('final dev score: ', score(y_val, y_vals_vote))
        print('final test score: ', score(y_test, predict))
        #         return y_test_vote, y_vals_vote, y_test, y_vals
        with open("teacher_knowledge.json", "w") as f:
            json.dump(knowledge_dict, f)
예제 #24
0
def manual_train():
    #frac = args.frac
    args = get_args()
    fold = args.fold
    EPOCHS = args.epochs

    BATCH_SIZE = 32
    LR = 1e-4

    with timed_bolck(f'Prepare train data#{BATCH_SIZE}'):
        X, y, _ = get_train_test_bert()

        ##Begin to define model
        from keras_bert import load_trained_model_from_checkpoint

        model_bert = load_trained_model_from_checkpoint(
            config_path,
            checkpoint_path,
            training=True,
            seq_len=SEQ_LEN,
        )

        #model_right = load_trained_model_from_checkpoint(config_path, checkpoint_path, training=True, seq_len=SEQ_LEN, )

        from tensorflow.python import keras
        from keras_bert import AdamWarmup, calc_train_steps
        app_des = model_bert.inputs[:2]
        dense_app_des = model_bert.get_layer('NSP-Dense').output

        model_bert = keras.models.Model(inputs=app_des,
                                        outputs=dense_app_des,
                                        name='bert_output')

        inputs = [
            keras.models.Input(shape=(SEQ_LEN, ), name=f'INPUT-{name}')
            for name in range(4)
        ]

        left = model_bert(inputs[:2])
        right = model_bert(inputs[2:])

        decay_steps, warmup_steps = calc_train_steps(
            y.shape[0],
            batch_size=BATCH_SIZE,
            epochs=EPOCHS,
        )

        fc_ex = keras.layers.concatenate([left, right], axis=1)
        #fc_ex = keras.layers.Subtract()([left, right])
        # End input from manual

        #outputs = keras.layers.Dense(units=8, activation='softmax')(fc_ex)

        outputs = keras.layers.Dense(units=1, activation='sigmoid')(fc_ex)

        model = keras.models.Model(inputs, outputs)
        model.compile(
            AdamWarmup(decay_steps=decay_steps,
                       warmup_steps=warmup_steps,
                       lr=LR),
            loss='binary_crossentropy',
            metrics=['accuracy'],
        )

        model.summary(line_length=120)
        ##End to define model

        input1_col = [col for col in X.columns if str(col).startswith('bert_')]
        input3_col = [col for col in X.columns if str(col).startswith('fea_')]
        #max_words = len(input1_col)
        model  #= get_model(max_words)

        Y_cat = y

    with timed_bolck(f'Training#{fold}'):
        from core.split import split_df_by_index_no_bin
        train_idx, test_idx = split_df_by_index_no_bin(X, fold)

        logger.info(
            f'Shape train_x.loc[:, input1_col].iloc[:,0]: {X.loc[:, input1_col].iloc[:,0].shape}'
        )
        train_x, train_y, val_x, val_y = \
            X.iloc[train_idx], Y_cat[train_idx], X.iloc[test_idx], Y_cat[test_idx]

        logger.info(
            f'get_train_test output: train_x:{train_x.shape}, train_y:{train_y.shape}, val_x:{val_x.shape} '
        )
        #for sn in range(5):
        input1 = train_x.loc[:, input1_col]  #.astype(np.float32)
        input2 = np.zeros_like(input1)  #.astype(np.int8)
        input3 = train_x.loc[:, input3_col]
        input4 = np.zeros_like(input3)
        logger.info(
            f'NN Input1:{input1.shape}, Input2:{input2.shape}, Input3:{input3.shape}'
        )

        logger.info(f'NN train_x:{train_x[:3]}')

        from keras_bert import get_custom_objects
        import tensorflow as tf

        with tf.keras.utils.custom_object_scope(get_custom_objects()):
            his = model.fit([input1, input2, input3, input4],
                            train_y,
                            validation_data=([
                                val_x.loc[:, input1_col],
                                np.zeros_like(val_x.loc[:, input1_col]),
                                val_x.loc[:, input3_col],
                                np.zeros_like(val_x.loc[:, input3_col]),
                            ], val_y),
                            epochs=EPOCHS,
                            shuffle=True,
                            batch_size=64,
                            callbacks=[Cal_acc(val_x, y.iloc[test_idx])]
                            #steps_per_epoch=1000, validation_steps=10
                            )

            #gen_sub(model, X_test, sn)

    return his
예제 #25
0
test_x, test_y = load_data(test_path)

# 定义自定义模型
inputs = model.inputs[:2]
bert_out_seq = model.get_
dense = model.get_layer('NSP-Dense').output  # 获取'NSP-Dense'层的输出
outputs = keras.layers.Dense(units=2, activation='softmax')(dense)  # 稠密层 + softmax
decay_steps, warmup_steps = calc_train_steps(  # 指数衰减步数,热启动步数
    train_y.shape[0],
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    )

model = keras.models.Model(inputs, outputs)
model.compile(  # 编译模型以供训练
    AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=LR),
    loss='sparse_categorical_crossentropy',
    metrics=['sparse_categorical_accuracy'],
    )

# 初始化所有变量
sess = K.get_session()
uninitialized_variables = set([i.decode('ascii') for i in sess.run(tf.report_uninitialized_variables())])
init_op = tf.variables_initializer(
    [v for v in tf.global_variables() if v.name.split(':')[0] in uninitialized_variables]
    )
sess.run(init_op)

# 转为 tpu model
tpu_address = 'grpc://' + os.environ['COLAB_TPU_ADDR']
strategy = tf.contrib.tpu.TPUDistributionStrategy(
예제 #26
0
print("Number of LR decay steps: {0} \nNumber of warm-up steps: {1}".format(
    decay_steps, warmup_steps))

# Next we read the BERT model that we just loaded:
config_file = os.path.join(BERT_PRETRAINED_DIR, 'bert_config.json')
checkpoint_file = os.path.join(BERT_PRETRAINED_DIR, 'bert_model.ckpt')
bert_model = load_trained_model_from_checkpoint(config_file,
                                                checkpoint_file,
                                                training=True,
                                                seq_len=max_seq_len)
print("Lookup model architecture with: bert_model.summary()")
print("I dare ya'")

# Initialize custom Adam optimizer with warmup:
adam_warmup = AdamWarmup(lr=learning_rate,
                         decay_steps=decay_steps,
                         warmup_steps=warmup_steps,
                         weight_decay=weight_decay)

# Picking BERT layers and building output layers:
input_layer = bert_model.input
embedding_output = bert_model.layers[-6].output
output_layer = Dense(1,
                     activation='sigmoid',
                     kernel_initializer=TruncatedNormal(stddev=0.02),
                     name='class_output')(embedding_output)
model = Model(inputs=input_layer, outputs=output_layer)
model.compile(loss='binary_crossentropy',
              optimizer=adam_warmup,
              metrics=["acc"])
model.summary()
예제 #27
0
    model = create_cls_model(len(labels))
    train_D = DataGenerator(train_data)
    test_D = DataGenerator(test_data)

    print("begin model training...")
    # 保存最新的val_acc最好的模型文件
    filepath = "models/%s-{epoch:02d}-{val_acc:.4f}.h5" % DATA_DIR.split("/")[-1]
    checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
    # add warmup
    total_steps, warmup_steps = calc_train_steps(
        num_example=len(train_data),
        batch_size=BATCH_SIZE,
        epochs=EPOCH,
        warmup_proportion=0.1,
    )
    optimizer = AdamWarmup(total_steps, warmup_steps, lr=5e-5, min_lr=1e-7)
    model.compile(
        loss='categorical_crossentropy',
        optimizer=optimizer,
        metrics=['accuracy']
    )
    model.fit_generator(
        train_D.__iter__(),
        steps_per_epoch=len(train_D),
        epochs=EPOCH,
        validation_data=test_D.__iter__(),
        validation_steps=len(test_D),
        callbacks=[checkpoint]
    )
    print("finish model training!")
예제 #28
0
def train_base():
    args = get_args()
    #frac = args.frac
    fold = args.fold
    EPOCHS = args.epochs

    BATCH_SIZE = 128
    LR = 1e-4

    with timed_bolck(f'Prepare train data#{BATCH_SIZE}'):
        X, y, _ = get_train_test_bert()

        ##Begin to define model
        from keras_bert import load_trained_model_from_checkpoint

        model = load_trained_model_from_checkpoint(
            config_path,
            checkpoint_path,
            training=True,
            seq_len=SEQ_LEN,
        )
        model.summary(line_length=120)

        from tensorflow.python import keras
        from keras_bert import AdamWarmup, calc_train_steps
        inputs = model.inputs[:2]
        dense = model.get_layer('NSP-Dense').output
        keras.models.Model(inputs, dense).summary()

        outputs = keras.layers.Dense(units=num_classes,
                                     activation='softmax')(dense)

        decay_steps, warmup_steps = calc_train_steps(
            y.shape[0],
            batch_size=BATCH_SIZE,
            epochs=EPOCHS,
        )

        model = keras.models.Model(inputs, outputs)
        model.compile(
            AdamWarmup(decay_steps=decay_steps,
                       warmup_steps=warmup_steps,
                       lr=LR),
            loss='categorical_crossentropy',
            metrics=['accuracy'],
        )
        ##End to define model

        input1_col = [col for col in X.columns if str(col).startswith('bert_')]
        input2_col = [col for col in X.columns if str(col).startswith('fea_')]
        #max_words = len(input1_col)
        model  #= get_model(max_words)

        #get_feature_manual.cache_clear()
        Y_cat = keras.utils.to_categorical(y, num_classes=num_classes)
        #folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=2019)

    with timed_bolck(f'Training#{fold}'):
        from core.split import split_df_by_index
        train_idx, test_idx = split_df_by_index(X, fold)

        logger.info(
            f'Shape train_x.loc[:, input1_col].iloc[:,0]: {X.loc[:, input1_col].iloc[:,0].shape}'
        )
        train_x, train_y, val_x, val_y = \
            X.iloc[train_idx], Y_cat[train_idx], X.iloc[test_idx], Y_cat[test_idx]

        logger.info(
            f'get_train_test output: train_x:{train_x.shape}, train_y:{train_y.shape}, val_x:{val_x.shape} '
        )

        #train_x, train_y = filter_short_desc(train_x, train_y)

        input1 = train_x.loc[:, input1_col]  #.astype(np.float32)
        input2 = np.zeros_like(input1)  #.astype(np.int8)

        logger.info(f'NN train_x:{train_x[:3]}')
        min_len_ratio = get_args().min_len_ratio
        max_bin = get_args().max_bin
        logger.info(
            f'NN Input1:{input1.shape}, Input2:{input2.shape}, SEQ_LEN:{SEQ_LEN}, min_len_ratio:{min_len_ratio}, bin:{max_bin} '
        )

        from keras_bert import get_custom_objects
        import tensorflow as tf
        with tf.keras.utils.custom_object_scope(get_custom_objects()):
            his = model.fit([input1, input2],
                            train_y,
                            validation_data=([
                                val_x.loc[:, input1_col],
                                np.zeros_like(val_x.loc[:, input1_col])
                            ], val_y),
                            epochs=EPOCHS,
                            shuffle=True,
                            batch_size=64,
                            callbacks=[Cal_acc(val_x, y.iloc[test_idx])]
                            #steps_per_epoch=1000, validation_steps=10
                            )

            #gen_sub(model, X_test, sn)

    return his