Beispiel #1
0
 def test_calc_train_steps(self):
     total, warmup = calc_train_steps(
         num_example=1024,
         batch_size=32,
         epochs=10,
         warmup_proportion=0.1,
     )
     self.assertEqual((320, 32), (total, warmup))
    def train(self):
        x_trn, y_trn = self.train_data['text'][:].values, self.train_data['label'][:].values
        x_val, y_val = self.dev_data['text'][:].values, self.dev_data['label'][:].values
        x_test, y_test = self.test_data['text'][:].values, self.test_data['label'][:].values
        folds, batch_size, steps, max_len = 5, 16, 30, 300
        y_vals_vote = np.zeros(len(y_val))
        best_score = 0
        model = self.create_model()
        total_steps, warmup_steps = calc_train_steps(num_example=x_trn.shape[0],
                                                     batch_size=batch_size, epochs=steps, warmup_proportion=0.2)
        adamwarmup = AdamWarmup(total_steps, warmup_steps, lr=1e-4, min_lr=1e-6)
        losses = [self.distil_loss(), self.distil_loss(soft=True, T=self.T)]
        if self.Mode == 'patient':
            losses.extend([self.distil_loss(soft=True, T=self.T)])
        elif self.Mode == 'patient.full':
            losses.extend([self.distil_loss(soft=True, T=self.T),
                           self.distil_loss(soft=True, T=self.T),
                           self.distil_loss(soft=True, T=self.T)])
        model.compile(loss=losses, optimizer=adamwarmup)

        x1_val_tok, x2_val_tok = sentence2token(x_val, max_len=max_len)
        knowledge = self.teacher
        logit, feature10, feature11, feature12 = np.array(knowledge['logit']), \
            np.array(knowledge['layer_10']), np.array(knowledge['layer_11']), np.array(knowledge['layer_12'])
        for epoch in range(steps):
            # ==========train=========== #
            generator = batch_iter(x_trn, y_trn, logit, feature10, feature11, feature12,
                                   max_len=max_len, batch_size=batch_size)
            for x1_tok, x2_tok, log, feat10, feat11, feat12, lab in generator:
                outputs = [np.eye(2)[lab], log]
                if self.Mode == 'patient':
                    outputs.extend([feat12])
                elif self.Mode == 'patient.full':
                    outputs.extend([feat10, feat11, feat12])
                model.train_on_batch(
                    [x1_tok, x2_tok], outputs)
            # ==========eval=========== #
            y_val_pre = model.predict([x1_val_tok, x2_val_tok])[0]
            y_val_vote = np.argmax(y_val_pre, -1)  # 最大的值所在的索引作为预测结果
            f1, auc, acc, recall = score(y_val, y_val_vote)
            # ==========EarlyStop=========== #
            if f1 > best_score:
                patient = 0
                best_score = f1
                y_vals_vote = y_val_vote
                model.save_weights('models/distil_bert_model')

            print('epoch:{}, f1:{}, auc:{}, acc:{}, recall:{}, best_score:{}'.format(
                epoch, f1, auc, acc, recall, best_score))
            patient += 1
            if patient >= 5:
                break
        # ==========加载最优模型预测测试集=========== #
        model.load_weights('models/distil_bert_model')
        x1_test_tok, x2_test_tok = sentence2token(x_test, max_len=max_len)
        predict = np.argmax(model.predict([x1_test_tok, x2_test_tok])[0], -1)
        print('final dev score: ', score(y_val, y_vals_vote))
        print('final test score: ', score(y_test, predict))
    def get_opt(num_example, warmup_proportion=0.1, lr=2e-5, min_lr=None):
        if cfg["opt"].lower() == "nadam":
            opt = Nadam(lr=lr)
        else:
            total_steps, warmup_steps = calc_train_steps(
                num_example=num_example,
                batch_size=B_SIZE,
                epochs=MAX_EPOCH,
                warmup_proportion=warmup_proportion,
            )

            opt = AdamWarmup(total_steps, warmup_steps, lr=lr, min_lr=min_lr)

        return opt
Beispiel #4
0
 def _get_opt(num_example, warmup_proportion=0.1, lr=2e-5, min_lr=None):
     total_steps, warmup_steps = calc_train_steps(
         num_example=num_example,
         batch_size=B_SIZE,
         epochs=MAX_EPOCH,
         warmup_proportion=warmup_proportion,
     )
     opt = AdamWarmup(total_steps, warmup_steps, lr=lr, min_lr=min_lr)
     if cfg.get("accum_step", None) and cfg["accum_step"] > 1:
         print("[!] using accum_step = {}".format(cfg["accum_step"]))
         from accum_optimizer import AccumOptimizer
         opt = AccumOptimizer(opt, steps_per_update=cfg["accum_step"])
     
     return opt
Beispiel #5
0
def create_optimizer(num_example, options):
    total_steps, warmup_steps = calc_train_steps(
        num_example=num_example,
        batch_size=options.batch_size,
        epochs=options.num_train_epochs,
        warmup_proportion=options.warmup_proportion,
    )
    optimizer = AdamWarmup(
        total_steps,
        warmup_steps,
        lr=options.learning_rate,
        epsilon=1e-6,
        weight_decay=0.01,
        weight_decay_pattern=['embeddings', 'kernel', 'W1', 'W2', 'Wk', 'Wq', 'Wv', 'Wo']
    )
    return optimizer
Beispiel #6
0
def model_build(len_train):
    global NUM_CLASSES
    global BATCH_SIZE
    global NUM_EPOCHS
    global MIN_LR
    global LR

    bert_model = load_trained_model_from_checkpoint(config_path,
                                                    checkpoint_path,
                                                    seq_len=MAXLEN,
                                                    trainable=True)

    x1_in = Input(shape=(None, ))
    x2_in = Input(shape=(None, ))
    aux_in = Input(shape=(2, ))

    inputs = bert_model([x1_in, x2_in])
    bert = Lambda(lambda x: x[:, 0])(inputs)
    dense = concatenate([bert, aux_in])
    outputs = Dense(NUM_CLASSES, activation='softmax')(dense)
    model = Model([x1_in, x2_in, aux_in], outputs)

    decay_steps, warmup_steps = calc_train_steps(
        len_train,
        batch_size=BATCH_SIZE,
        epochs=NUM_EPOCHS,
    )

    model.compile(loss='sparse_categorical_crossentropy',
                  optimizer=AdamWarmup(
                      decay_steps=decay_steps,
                      warmup_steps=warmup_steps,
                      lr=LR,
                      min_lr=MIN_LR,
                  ),
                  metrics=['sparse_categorical_accuracy'])
    del bert_model
    gc.collect()
    return model
Beispiel #7
0
    def compile_model(self, data_size, loss_fn, metrics):
        inputs = self.pretrained_model.inputs[:2]
        dense = self.pretrained_model.get_layer('NSP-Dense').output
        outputs = keras.layers.Dense(units=2, activation='softmax')(dense)

        decay_steps, warmup_steps = calc_train_steps(
            data_size,
            batch_size=self.batch_size,
            epochs=self.epochs,
        )

        model = keras.models.Model(inputs, outputs)

        model.compile(
            AdamWarmup(decay_steps=decay_steps,
                       warmup_steps=warmup_steps,
                       lr=self.lr),
            loss=loss_fn,
            metrics=[metrics],
        )
        self.model = model
        print(self.model.summary())
        return self.model
Beispiel #8
0
def build(model, num, lr=0.00002):
    # @title Build Custom Model
    from tensorflow.python import keras
    from keras_bert import AdamWarmup, calc_train_steps

    inputs = model.inputs[:2]
    dense = model.get_layer('NSP-Dense').output
    outputs = keras.layers.Dense(units=len(le.classes_),
                                 activation='softmax')(dense)

    decay_steps, warmup_steps = calc_train_steps(
        num,
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
    )

    model = keras.models.Model(inputs, outputs)

    for x in range(len(model.layers)):
        #print(x)
        model.layers[x].trainable = True
    '''
    model.layers[-3].trainable = True
    model.layers[-4].trainable = True
    model.layers[-5].trainable = True
    model.layers[-6].trainable = True
    model.layers[-7].trainable = True
    '''
    model.layers[-1].trainable = True
    model.layers[-2].trainable = True

    model.compile(
        AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=lr),
        loss='sparse_categorical_crossentropy',
        metrics=['sparse_categorical_accuracy'],
    )
    return model
Beispiel #9
0
def build_model(args):

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    #config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1
    K.set_session(tf.Session(config=config))
    
    print("Loading model..")
    custom_objects = get_custom_objects()
    bert_model = load_model(args.model, custom_objects=custom_objects)
    
    for layer in bert_model.layers:
        layer.trainable = False
    
    input_features = [Input(shape=(get_label_dim(args.train),)) for _ in args.features_train]

    stacked = Lambda(lambda x: K.stack(x, axis=1))([bert_model.output, *input_features])

    stacked = Permute((2, 1), name="stack_permute")(stacked)

    output_layer = TimeDistributed(Dense(1, activation="tanh", name="decision"))(stacked)
    output_layer = Flatten(name="time_distributed_flatten")(output_layer)
    output_layer = Activation("softmax")(output_layer)

    # The bert model has multiple inputs, so unpack those.
    model = Model([*bert_model.input, *input_features], output_layer)

    if args.gpus > 1:
        template_model = model
        model = multi_gpu_model(template_model, gpus=args.gpus)

    callbacks = [Metrics()]

    if args.patience > -1:
        callbacks.append(EarlyStopping(patience=args.patience, verbose=1))

    if args.checkpoint_interval > 0:
        callbacks.append(ModelCheckpoint(args.output_file + ".checkpoint-{epoch}",  period=args.checkpoint_interval))

    total_steps, warmup_steps =  calc_train_steps(num_example=get_example_count(args.train),
                                                batch_size=args.batch_size, epochs=args.epochs,
                                                warmup_proportion=0.01)

    optimizer = AdamWarmup(total_steps, warmup_steps, lr=args.lr)

    model.compile(loss=["categorical_crossentropy"], optimizer=optimizer, metrics=[])

    print(model.summary(line_length=118))
    print("Number of GPUs in use:", args.gpus)
    print("Batch size:", args.batch_size)
    print("Learning rate:", args.lr)
    print("Dropout:", args.dropout)

    model.fit_generator(data_generator(args.train, args.batch_size, seq_len=args.seq_len, features=args.features_train),
                        steps_per_epoch=ceil( get_example_count(args.train) / args.batch_size ),
                        use_multiprocessing=True, epochs=args.epochs, callbacks=callbacks,
                        validation_data=data_generator(args.dev, args.eval_batch_size, seq_len=args.seq_len, features=args.features_dev),
                        validation_steps=ceil( get_example_count(args.dev) / args.eval_batch_size ))
                        

    print("Saving model:", args.output_file)
    if args.gpus > 1:
        template_model.save(args.output_file)
    else:
        model.save(args.output_file)
def build_model(args):

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    K.set_session(tf.Session(config=config))

    if args.load_model:
        print("Loading previously saved model..")
        if args.bert_config:
            print("Warning: --bert_config ignored when loading previous Keras model.", file=sys.stderr)
        custom_objects = get_custom_objects()
        model = load_model(args.load_model, custom_objects=custom_objects)
    
    else:
        print("Building model..")
        bert = load_trained_model_from_checkpoint(args.bert_config, args.init_checkpoint,
                                                    training=False, trainable=True,
                                                    seq_len=args.seq_len)

        transformer_output = get_encoder_component(name="Encoder-13", input_layer=bert.layers[-1].output,
                                                head_num=12, hidden_dim=3072, feed_forward_activation=gelu)

        drop_mask = Lambda(lambda x: x, name="drop_mask")(bert.output)

        slice_CLS = Lambda(lambda x: K.slice(x, [0, 0, 0], [-1, 1, -1]), name="slice_CLS")(drop_mask)
        flatten_CLS = Flatten()(slice_CLS)

        # Needed to avoid a json serialization error when saving the model.
        last_position = args.seq_len-1
        slice_SEP = Lambda(lambda x: K.slice(x, [0, last_position, 0], [-1, 1, -1]), name="slice_SEP")(drop_mask)
        flatten_SEP = Flatten()(slice_SEP)

        permute_layer = Permute((2, 1))(drop_mask)
        permute_average = GlobalAveragePooling1D()(permute_layer)
        permute_maximum =  GlobalMaxPooling1D()(permute_layer)

        concat = Concatenate()([permute_average, permute_maximum, flatten_CLS, flatten_SEP])

        output_layer = Dense(get_label_dim(args.train), activation='sigmoid', name="label_out")(flatten_CLS)

        model = Model(bert.input, output_layer)
        
        total_steps, warmup_steps =  calc_train_steps(num_example=get_example_count(args.train),
                                                    batch_size=args.batch_size, epochs=args.epochs,
                                                    warmup_proportion=0.01)

        # optimizer = AdamWarmup(total_steps, warmup_steps, lr=args.lr)
        optimizer = keras.optimizers.Adam(lr=args.lr)

        model.compile(loss=["binary_crossentropy"], optimizer=optimizer, metrics=[])

    if args.gpus > 1:
        template_model = model
        # Set cpu_merge=False for better performance on NVLink connected GPUs.
        model = multi_gpu_model(template_model, gpus=args.gpus, cpu_merge=False)
        # TODO: need to compile this model as well when doing multigpu!

    callbacks = [Metrics(model)]

    if args.patience > -1:
        callbacks.append(EarlyStopping(patience=args.patience, verbose=1))

    if args.checkpoint_interval > 0:
        callbacks.append(ModelCheckpoint(args.output_file + ".checkpoint-{epoch}",  period=args.checkpoint_interval))


    print(model.summary(line_length=118))
    print("Number of GPUs in use:", args.gpus)
    print("Batch size:", args.batch_size)
    print("Learning rate:", K.eval(model.optimizer.lr))
    # print("Dropout:", args.dropout)

    model.fit_generator(data_generator(args.train, args.batch_size, seq_len=args.seq_len),
                        steps_per_epoch=ceil( get_example_count(args.train) / args.batch_size ),
                        use_multiprocessing=True, epochs=args.epochs, callbacks=callbacks,
                        validation_data=data_generator(args.dev, args.eval_batch_size, seq_len=args.seq_len),
                        validation_steps=ceil( get_example_count(args.dev) / args.eval_batch_size ))

    print("Saving model:", args.output_file)
    if args.gpus > 1:
        template_model.save(args.output_file)
    else:
        model.save(args.output_file)
Beispiel #11
0
def main(argv):
    args = argparser().parse_args(argv[1:])
    bert, vocab = load_pretrained(args)
    tokenizer = Tokenizer(vocab, cased=not args.do_lower_case)
    labels, train_sents, dev_sents, test_sents = load_data(args)

    train_data = create_examples(train_sents, tokenizer, labels, args)
    dev_data = create_examples(dev_sents, tokenizer, labels, args)
    test_data = create_examples(test_sents, tokenizer, labels, args)

    output = Dense(len(labels), activation='softmax')(bert.output)
    model = Model(inputs=bert.inputs, outputs=output)
    model.summary(line_length=80)

    train_input = np.array([e.input_ids for e in train_data])
    train_in_mask = np.array([e.input_mask for e in train_data])
    train_segments = np.array([e.segment_ids for e in train_data])
    train_output = np.expand_dims(
        np.array([e.label_ids for e in train_data]), -1)
    train_head_flags = np.array([e.head_flags for e in train_data])

    total_steps, warmup_steps = calc_train_steps(
        num_example=len(train_input),
        batch_size=args.train_batch_size,
        epochs=args.num_train_epochs,
        warmup_proportion=0.1,
    )

    optimizer = AdamWarmup(
        total_steps,
        warmup_steps,
        lr=args.learning_rate,
        weight_decay=0.01,
        beta_1=0.9,
        beta_2=0.999,
        epsilon=1e-6,
        weight_decay_pattern=['embeddings', 'kernel', 'W1', 'W2', 'Wk', 'Wq', 'Wv', 'Wo'],
        min_lr=0    # TODO
    )

    model.compile(
        loss='sparse_categorical_crossentropy',
        sample_weight_mode='temporal',
        optimizer=optimizer
    )

    dev_input = np.array([e.input_ids for e in dev_data])
    dev_in_mask = np.array([e.input_mask for e in dev_data])
    dev_segments = np.array([e.segment_ids for e in dev_data])
    dev_output = np.expand_dims(np.array([e.label_ids for e in dev_data]),-1)
    dev_head_flags = np.array([e.head_flags for e in dev_data])

    train_start = datetime.now()
    print('start training at', train_start)
    train_cb = EvaluationCallback(
        'train', train_input, train_segments, train_output, train_head_flags)
    dev_cb = EvaluationCallback(
        'dev', dev_input, dev_segments, dev_output, dev_head_flags)
    callbacks = [train_cb, dev_cb]
    model.fit(
        [train_input, train_segments],
        train_output,
        sample_weight=train_in_mask,
        batch_size=args.train_batch_size,
        epochs=args.num_train_epochs,
        verbose=1,
        callbacks=callbacks
    )
    train_end = datetime.now()
    print('done training', train_end, 'time', train_end-train_start)

    if args.predict is not None:
        if args.predict == 'dev':
            pred_data, pred_sents = dev_data, dev_sents
        else:
            assert args.predict == 'test'
            pred_data, pred_sents = test_data, test_sents
        pred_input = np.array([e.input_ids for e in pred_data])
        pred_segments = np.array([e.segment_ids for e in pred_data])
        pred = model.predict(
            [pred_input, pred_segments],
            verbose=1
        )
        pred_tokens = [[t for t, _ in s] for s in pred_sents]
        pred_head_flags = np.array([e.head_flags for e in pred_data])
        write_predictions(pred_tokens, pred_input, pred_head_flags,
                          pred, vocab, labels, args.output)
    print('best dev result', dev_cb.best, 'for epoch', dev_cb.best_epoch)
    return 0
model = load_trained_model_from_checkpoint(
    config_path,
    checkpoint_path,
    training=True,
    trainable=True,
    seq_len=SEQ_LEN,
)

inputs = model.inputs[:2]
dense = model.get_layer('NSP-Dense').output
outputs = keras.layers.Dense(units=1, activation='sigmoid')(dense)
model = keras.models.Model(inputs, outputs)

total_steps, warmup_steps = calc_train_steps(
    num_example=x_train[0].shape[0],
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    warmup_proportion=0.1,
)
optimizer = AdamWarmup(total_steps, warmup_steps, lr=1e-4, min_lr=LR)

model.compile(
    RAdam(LR),
    loss='binary_crossentropy',
    metrics=['accuracy'],
)
model.summary()

sess = K.get_session()
uninitialized_variables = set(
    [i.decode('ascii') for i in sess.run(tf.report_uninitialized_variables())])
init_op = tf.variables_initializer([
    def train(self):
        x_trn, y_trn = self.train_data['text'][:].values, self.train_data[
            'label'][:].values
        x_val, y_val = self.dev_data['text'][:].values, self.dev_data[
            'label'][:].values
        x_test, y_test = self.test_data['text'][:].values, self.test_data[
            'label'][:].values
        folds, batch_size, steps, max_len = 5, 16, 30, 300
        y_vals = np.zeros((len(x_val), 2))
        y_vals_vote = np.zeros(len(x_val))
        y_test_pre = np.zeros((len(x_test), 2))
        knowledge_dict = dict()
        model = self.create_model()
        total_steps, warmup_steps = calc_train_steps(
            num_example=x_trn.shape[0],
            batch_size=batch_size,
            epochs=steps,
            warmup_proportion=0.2)
        adamwarmup = AdamWarmup(total_steps,
                                warmup_steps,
                                lr=1e-5,
                                min_lr=1e-7)
        model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=1e-5))
        model.save_weights('origin')

        patient, best_score = 0, -1
        x1_trn_tok, x2_trn_tok = sentence2token(x_trn, max_len=max_len)
        x1_val_tok, x2_val_tok = sentence2token(x_val, max_len=max_len)
        for epoch in range(steps):
            # ==========train=========== #
            generator = batch_iter(x_trn,
                                   y_trn,
                                   max_len=max_len,
                                   batch_size=batch_size)
            for x1_tok, x2_tok, lab in generator:
                model.train_on_batch([x1_tok, x2_tok], np.eye(2)[lab])
            # ==========eval=========== #
            y_val_pre = model.predict([x1_val_tok, x2_val_tok])
            y_val_vote = np.argmax(y_val_pre, -1)  # 最大的值所在的索引作为预测结果
            f1, auc, acc, recall = score(y_val, y_val_vote)
            # ==========EarlyStop=========== #
            if f1 > best_score:
                patient = 0
                best_score = f1
                y_vals_vote = y_val_vote
                y_vals = y_val_pre
                model.save_weights('weight')
                # =========save knowledge==========
                knowledge_dict = self.save_knowlege(x1_trn_tok, x2_trn_tok,
                                                    model, knowledge_dict)

            print('epoch:{}, f1:{}, auc:{}, acc:{}, recall:{}, best_score:{}'.
                  format(epoch, f1, auc, acc, recall, best_score))
            patient += 1
            if patient >= 5:
                break
        # ==========加载最优模型预测测试集=========== #
        model.load_weights('weight')
        x1_test_tok, x2_test_tok = sentence2token(x_test, max_len=max_len)
        predict = np.argmax(model.predict([x1_test_tok, x2_test_tok]), -1)
        print('final dev score: ', score(y_val, y_vals_vote))
        print('final test score: ', score(y_test, predict))
        #         return y_test_vote, y_vals_vote, y_test, y_vals
        with open("teacher_knowledge.json", "w") as f:
            json.dump(knowledge_dict, f)
Beispiel #14
0
def manual_train():
    #frac = args.frac
    args = get_args()
    fold = args.fold
    EPOCHS = args.epochs

    BATCH_SIZE = 32
    LR = 1e-4

    with timed_bolck(f'Prepare train data#{BATCH_SIZE}'):
        X, y, _ = get_train_test_bert()

        ##Begin to define model
        from keras_bert import load_trained_model_from_checkpoint

        model_bert = load_trained_model_from_checkpoint(
            config_path,
            checkpoint_path,
            training=True,
            seq_len=SEQ_LEN,
        )

        #model_right = load_trained_model_from_checkpoint(config_path, checkpoint_path, training=True, seq_len=SEQ_LEN, )

        from tensorflow.python import keras
        from keras_bert import AdamWarmup, calc_train_steps
        app_des = model_bert.inputs[:2]
        dense_app_des = model_bert.get_layer('NSP-Dense').output

        model_bert = keras.models.Model(inputs=app_des,
                                        outputs=dense_app_des,
                                        name='bert_output')

        inputs = [
            keras.models.Input(shape=(SEQ_LEN, ), name=f'INPUT-{name}')
            for name in range(4)
        ]

        left = model_bert(inputs[:2])
        right = model_bert(inputs[2:])

        decay_steps, warmup_steps = calc_train_steps(
            y.shape[0],
            batch_size=BATCH_SIZE,
            epochs=EPOCHS,
        )

        fc_ex = keras.layers.concatenate([left, right], axis=1)
        #fc_ex = keras.layers.Subtract()([left, right])
        # End input from manual

        #outputs = keras.layers.Dense(units=8, activation='softmax')(fc_ex)

        outputs = keras.layers.Dense(units=1, activation='sigmoid')(fc_ex)

        model = keras.models.Model(inputs, outputs)
        model.compile(
            AdamWarmup(decay_steps=decay_steps,
                       warmup_steps=warmup_steps,
                       lr=LR),
            loss='binary_crossentropy',
            metrics=['accuracy'],
        )

        model.summary(line_length=120)
        ##End to define model

        input1_col = [col for col in X.columns if str(col).startswith('bert_')]
        input3_col = [col for col in X.columns if str(col).startswith('fea_')]
        #max_words = len(input1_col)
        model  #= get_model(max_words)

        Y_cat = y

    with timed_bolck(f'Training#{fold}'):
        from core.split import split_df_by_index_no_bin
        train_idx, test_idx = split_df_by_index_no_bin(X, fold)

        logger.info(
            f'Shape train_x.loc[:, input1_col].iloc[:,0]: {X.loc[:, input1_col].iloc[:,0].shape}'
        )
        train_x, train_y, val_x, val_y = \
            X.iloc[train_idx], Y_cat[train_idx], X.iloc[test_idx], Y_cat[test_idx]

        logger.info(
            f'get_train_test output: train_x:{train_x.shape}, train_y:{train_y.shape}, val_x:{val_x.shape} '
        )
        #for sn in range(5):
        input1 = train_x.loc[:, input1_col]  #.astype(np.float32)
        input2 = np.zeros_like(input1)  #.astype(np.int8)
        input3 = train_x.loc[:, input3_col]
        input4 = np.zeros_like(input3)
        logger.info(
            f'NN Input1:{input1.shape}, Input2:{input2.shape}, Input3:{input3.shape}'
        )

        logger.info(f'NN train_x:{train_x[:3]}')

        from keras_bert import get_custom_objects
        import tensorflow as tf

        with tf.keras.utils.custom_object_scope(get_custom_objects()):
            his = model.fit([input1, input2, input3, input4],
                            train_y,
                            validation_data=([
                                val_x.loc[:, input1_col],
                                np.zeros_like(val_x.loc[:, input1_col]),
                                val_x.loc[:, input3_col],
                                np.zeros_like(val_x.loc[:, input3_col]),
                            ], val_y),
                            epochs=EPOCHS,
                            shuffle=True,
                            batch_size=64,
                            callbacks=[Cal_acc(val_x, y.iloc[test_idx])]
                            #steps_per_epoch=1000, validation_steps=10
                            )

            #gen_sub(model, X_test, sn)

    return his
Beispiel #15
0
    num_classes = math_ops.cast(array_ops.shape(y_true)[1], y_pred.dtype)
    y_true = y_true * (1.0 - label_smoothing) + (label_smoothing / num_classes)
    return categorical_crossentropy(y_true, y_pred)


if __name__ == '__main__':

    # 模型训练
    train_D = DataGenerator(train_samples)
    dev_D = DataGenerator(dev_samples)
    model = SimpleMultiChoiceMRC(CONFIG_FILE_PATH, CHECKPOINT_FILE_PATH,
                                 MAX_SEQ_LENGTH, NUM_CHOICES).create_model()
    # add warmup
    total_steps, warmup_steps = calc_train_steps(
        num_example=len(train_samples),
        batch_size=BATCH_SIZE,
        epochs=EPOCH,
        warmup_proportion=WARMUP_RATION,
    )
    optimizer = AdamWarmup(total_steps, warmup_steps, lr=2e-5, min_lr=1e-8)
    filepath = "models/multi_choice_model_%s-{epoch:02d}-{val_acc:.4f}.h5" % dataset
    checkpoint = ModelCheckpoint(filepath,
                                 monitor='val_acc',
                                 verbose=1,
                                 save_best_only=True,
                                 save_weights_only=True,
                                 mode='max')
    model.compile(loss=categorical_crossentropy_with_label_smoothing,
                  optimizer=optimizer,
                  metrics=['accuracy'])

    print("begin model training...")
Beispiel #16
0
                target.append(No)
                target.append(No)
    return x1, x2, target, ids


train_x1, train_x2, train_target, _ = genete_data1(train_topic, train_text,
                                                   train_stance)
test_x1, test_x2, test_target, test_id = genete_data1(test_topic, test_text,
                                                      test_stance)

from keras.layers import *
from keras.models import Model
from keras_bert import AdamWarmup, calc_train_steps
total_steps, warmup_steps = calc_train_steps(
    num_example=len(train_x1),
    batch_size=4,
    epochs=train_epochs,
    warmup_proportion=0.1,
)

optimizer = AdamWarmup(total_steps, warmup_steps, lr=1e-3, min_lr=1e-5)
bert_model = load_trained_model_from_checkpoint(config_path,
                                                checkpoint_path,
                                                seq_len=None)
for l in bert_model.layers:
    l.trainable = True

x1_in = Input(shape=(maxlen, ))
x2_in = Input(shape=(maxlen, ))
x = bert_model([x1_in, x2_in])
x = Lambda(lambda x: x[:, 0])(x)
p = Dense(1, activation='sigmoid')(x)
Beispiel #17
0
    indices = np.array(indices)
    return [indices, np.zeros_like(indices)], np.array(sentiments)
# 加载数据并转为词id
train_path = os.path.join(os.path.dirname(dataset), 'aclImdb', 'train')
test_path = os.path.join(os.path.dirname(dataset), 'aclImdb', 'test')
train_x, train_y = load_data(train_path)
test_x, test_y = load_data(test_path)

# 定义自定义模型
inputs = model.inputs[:2]
bert_out_seq = model.get_
dense = model.get_layer('NSP-Dense').output  # 获取'NSP-Dense'层的输出
outputs = keras.layers.Dense(units=2, activation='softmax')(dense)  # 稠密层 + softmax
decay_steps, warmup_steps = calc_train_steps(  # 指数衰减步数,热启动步数
    train_y.shape[0],
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    )

model = keras.models.Model(inputs, outputs)
model.compile(  # 编译模型以供训练
    AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=LR),
    loss='sparse_categorical_crossentropy',
    metrics=['sparse_categorical_accuracy'],
    )

# 初始化所有变量
sess = K.get_session()
uninitialized_variables = set([i.decode('ascii') for i in sess.run(tf.report_uninitialized_variables())])
init_op = tf.variables_initializer(
    [v for v in tf.global_variables() if v.name.split(':')[0] in uninitialized_variables]
Beispiel #18
0
def main():
    seq_id, seq_O, seq_P, id_to_label, id_to_term = encode_seq(
        df_label=df_label, maxlen=MAX_LEN)

    class Evaluation(Callback):
        def __init__(self, val_data, interval=1):
            self.val_data = val_data
            self.interval = interval
            self.best_f1 = 0.

            self.true_vp_val = [
                (row["id"], row["OpinionTerms"], row["Polarities"],
                 row['O_start'], row['O_end']) for rowid, row in df_label[
                     df_label['id'].isin(self.val_data[0])].iterrows()
            ]

        def on_epoch_end(self, epoch, log={}):
            if epoch % self.interval == 0:
                o_out, p_out = pred_model.predict(
                    self.val_data[1:4], batch_size=BATCH_SIZE)  # CRF概率
                o_pred = np.argmax(o_out, axis=2)
                p_pred = np.argmax(p_out, axis=2)

                texts = [
                    df_review[df_review['id'] == i]["Reviews"].values[0]
                    for i in self.val_data[0]
                ]

                pred_vp_val = decode_seq(self.val_data[0], o_pred, p_pred,
                                         id_to_label, texts)

                precision, recall, f1 = cal_opinion_metrics(
                    pred_vp_val, self.true_vp_val)
                if f1 > self.best_f1:
                    self.best_f1 = f1
                    self.model.save_weights(
                        f'./model_op/op_model_0924_viteb.weights')
                    print(f'best = {f1}')

    tokenizer = BertTokenizer(token_dict)

    seq_input, seq_seg = bert_text_to_seq(list(df_review["Reviews"]),
                                          tokenizer,
                                          maxlen=MAX_LEN)

    true_vp = [(row["id"], row["OpinionTerms"], row["Polarities"],
                row['O_start'], row['O_end'])
               for rowid, row in df_label.iterrows()]

    pred_vp = decode_seq(seq_id, seq_O, seq_P, id_to_label,
                         list(df_review["Reviews"]))

    cal_opinion_metrics(pred_vp, true_vp)

    seq_O = to_categorical(seq_O)

    seq_P = to_categorical(seq_P)

    df_review['pos_tag'] = df_review['Reviews'].progress_apply(pos_tag)

    with open('./data/postag2id_0922_laptop_make_up.pkl', 'rb') as f:
        postag2id = pickle.load(f)

    df_review['pos_tag'] = df_review['pos_tag'].progress_apply(
        lambda postag: [postag2id[x] for x in postag])

    seq_postag = np.array(df_review['pos_tag'].values.tolist())

    view_train, view_val = split_viewpoints(seq_id, seq_input, seq_seg, seq_O,
                                            seq_P, seq_postag)

    print(view_val[0])
    print('------------------- 保存验证集的id ---------------------')
    print('保存final 验证集的val ids')

    # np.save('./data/final_makeup_laptop_val_ids', view_val[0])
    print('------------------- 保存完毕 ---------------------------')
    # exit()
    bert_model = load_trained_model_from_checkpoint(config_path,
                                                    checkpoint_path,
                                                    seq_len=None)
    for l in bert_model.layers:
        l.trainable = True

    x1_in = Input(shape=(MAX_LEN, ), name='x1_in')
    x2_in = Input(shape=(MAX_LEN, ), name='x2_in')
    o_in = Input(shape=(
        MAX_LEN,
        len(id_to_term) + 1,
    ), name='o_in')
    p_in = Input(shape=(
        MAX_LEN,
        len(id_to_label) + 1,
    ), name='p_in')

    pos_tag_in = Input(shape=(MAX_LEN, ), name='pos_tag_in')
    pos_tag_emb = Embedding(len(postag2id), POS_TAG_DIM,
                            trainable=True)(pos_tag_in)

    x = bert_model([x1_in, x2_in])
    x = Concatenate()([x, pos_tag_emb])

    p_out = Dense(len(id_to_label) + 1,
                  activation='softmax')(x)  # p_out 是极性的输出
    crf = CRF(len(id_to_term) + 1)
    o_out = crf(x)
    loss_seq_O = crf.loss_function(o_in, o_out)  # 直接加入 Lambda层后 计算图会出错
    loss_seq_O = Lambda(lambda x: K.mean(x))(loss_seq_O)
    # loss_seq_O = Lambda(lambda x: K.mean(categorical_crossentropy(x[0], x[1])), name='loss_seq_O')([o_in, o_out])

    loss_p = Lambda(lambda x: K.mean(categorical_crossentropy(x[0], x[1])),
                    name='loss_c')([p_in, p_out])

    train_model = Model([x1_in, x2_in, pos_tag_in, o_in, p_in], [o_out, p_out])
    pred_model = Model([x1_in, x2_in, pos_tag_in], [o_out, p_out])
    train_model._losses = []
    train_model._per_input_losses = {}
    train_model.add_loss(loss_seq_O)
    train_model.add_loss(loss_p)

    print(view_train[0].shape[0])

    total_steps, warmup_steps = calc_train_steps(
        num_example=view_train[0].shape[0],
        batch_size=BATCH_SIZE,
        epochs=EPOCHS,
        warmup_proportion=0.1,
    )
    # optimizer = Adam(lr=1e-5)
    optimizer = AdamWarmup(total_steps, warmup_steps, lr=5e-5, min_lr=1e-6)

    train_model.compile(optimizer=optimizer)
    train_model.metrics_tensors.append(loss_seq_O)
    train_model.metrics_names.append('loss_seq_O')
    train_model.metrics_tensors.append(loss_p)
    train_model.metrics_names.append('loss_p')
    train_model.summary()

    eval_callback = Evaluation(val_data=view_val)

    train_model.fit(view_train[1:],
                    epochs=EPOCHS,
                    shuffle=True,
                    batch_size=BATCH_SIZE,
                    callbacks=[eval_callback])
Beispiel #19
0
# -*- coding: utf-8 -*-
"""
Created on Tue Nov 23 20:51:40 2021
@author: xiuzhang
"""
import numpy as np
from keras_bert import AdamWarmup, calc_train_steps

#生成随机数
train_x = np.random.standard_normal((1024, 100))
print(train_x)

#分批训练
total_steps, warmup_steps = calc_train_steps(
    num_example=train_x.shape[0],
    batch_size=32,
    epochs=10,
    warmup_proportion=0.1,
)

optimizer = AdamWarmup(total_steps, warmup_steps, lr=1e-3, min_lr=1e-5)
print(optimizer)
Beispiel #20
0
def manual_train():
    #frac = args.frac
    args = get_args()
    fold = args.fold
    EPOCHS = args.epochs

    BATCH_SIZE = 128
    LR = 1e-4

    with timed_bolck(f'Prepare train data#{BATCH_SIZE}'):
        X, y, _ = get_train_test_bert()




        ##Begin to define model
        from keras_bert import load_trained_model_from_checkpoint

        model = load_trained_model_from_checkpoint(config_path, checkpoint_path, training=True, seq_len=SEQ_LEN, )


        from tensorflow.python import keras
        from keras_bert import AdamWarmup, calc_train_steps
        inputs = model.inputs[:2]
        dense_bert = model.get_layer('NSP-Dense').output


        decay_steps, warmup_steps = calc_train_steps(
            y.shape[0],
            batch_size=BATCH_SIZE,
            epochs=EPOCHS,
        )

        # New input from manual

        data = get_feature_bert_wv().add_prefix('fea_')
        manual_fea_len = len([col for col in data.columns if col.startswith('fea_')])

        logger.info(f'manual_fea_len:{manual_fea_len}')
        manual_feature = keras.Input(shape=(manual_fea_len,), name='manual_feature', dtype='float32')
        inputs = inputs + [manual_feature]


        manual_feature = keras.layers.Dense(round(num_classes*0.6),  name='manual_dense', activation='relu')(manual_feature)
        manual_feature = keras.layers.Dropout(0.5)(manual_feature)
        #manual_feature = keras.layers.Dense(round(num_classes), activation='relu')(manual_feature)

        fc_ex = keras.layers.concatenate([dense_bert, manual_feature], axis=1)
        # End input from manual

        #fc_ex = keras.layers.Dense(units=1024, activation='softmax')(fc_ex)

        outputs = keras.layers.Dense(units=num_classes, activation='softmax')(fc_ex)

        model = keras.models.Model(inputs, outputs)
        model.compile(
            AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=LR),
            loss='categorical_crossentropy',
            metrics=['accuracy'],
        )

        model.summary(line_length=120)
        ##End to define model

        input1_col = [col for col in X.columns if str(col).startswith('bert_')]
        input3_col = [col for col in X.columns if str(col).startswith('fea_')]
        #max_words = len(input1_col)
        model #= get_model(max_words)

        #get_feature_manual.cache_clear()
        Y_cat = keras.utils.to_categorical(y, num_classes=num_classes)
        #folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=2019)

    with timed_bolck(f'Training#{fold}'):
        from core.split import split_df_by_index
        train_idx, test_idx = split_df_by_index(X,fold)

        logger.info(f'Shape train_x.loc[:, input1_col].iloc[:,0]: {X.loc[:, input1_col].iloc[:,0].shape}')
        train_x, train_y, val_x, val_y = \
            X.iloc[train_idx], Y_cat[train_idx], X.iloc[test_idx], Y_cat[test_idx]

        logger.info(f'get_train_test output: train_x:{train_x.shape}, train_y:{train_y.shape}, val_x:{val_x.shape} ')
        #for sn in range(5):
        input1 = train_x.loc[:, input1_col]#.astype(np.float32)
        input2 = np.zeros_like(input1)#.astype(np.int8)
        input3 = train_x.loc[:, input3_col]
        logger.info(f'NN Input1:{input1.shape}, Input2:{input2.shape}, Input3:{input3.shape}')

        logger.info(f'NN train_x:{train_x[:3]}')

        from keras_bert import get_custom_objects
        import tensorflow as tf

        with tf.keras.utils.custom_object_scope(get_custom_objects()):
            his = model.fit([input1, input2, input3], train_y,
                            validation_data = ([
                                                val_x.loc[:, input1_col],
                                                np.zeros_like(val_x.loc[:, input1_col]),
                                                val_x.loc[:, input3_col]
                                               ],
                                               val_y),
                            epochs=EPOCHS,  shuffle=True, batch_size=64,
                            callbacks=[Cal_acc(val_x, y.iloc[test_idx] )]
                      #steps_per_epoch=1000, validation_steps=10
                      )



            #gen_sub(model, X_test, sn)

    return his
Beispiel #21
0
a_model = Model([x1_in, x2_in, opinion_mask_in, lf_pos_in, rt_pos_in], a_out)
cp_model = Model([x1_in, x2_in, opinion_mask_in, lf_pos_in, rt_pos_in], c_out)

train_model = Model(
    [x1_in, x2_in, seq_a_in, opinion_mask_in, lf_pos_in, rt_pos_in, c_in],
    [a_out, c_out])

loss_c = Lambda(lambda x: K.mean(categorical_crossentropy(x[0], x[1])),
                name='loss_p')([c_in, c_out])

train_model.add_loss(loss_A)
train_model.add_loss(loss_c)

total_steps, warmup_steps = calc_train_steps(
    num_example=train_data[0].shape[0],
    batch_size=BATCH_SIZE,
    epochs=100,
    warmup_proportion=0.05,
)

optimizer = AdamWarmup(total_steps, warmup_steps, lr=1e-4, min_lr=1e-6)

train_model.compile(optimizer=optimizer)

train_model.metrics_tensors.append(loss_A)
train_model.metrics_names.append('loss_A')
train_model.metrics_tensors.append(loss_c)
train_model.metrics_names.append('loss_c')
train_model.summary()

eval_callback = Evaluation(val_data=val_data)
Beispiel #22
0
f = keras.layers.Dense(32, activation='relu')(f)
f = keras.layers.Dropout(0.5)(f)
outpt = keras.layers.Dense(classes_dict[mode],
                           activation=activation_dict[mode])(f)
model = keras.models.Model([ind, seg, inpt2], outpt)

# model.summary()

# keras.utils.plot_model(model,'model.png')
"""#Train"""

batch_size = 5
epochs = 60

decay_steps, warmup_steps = keras_bert.calc_train_steps(4 * ln // 5,
                                                        batch_size=batch_size,
                                                        epochs=epochs)
adawarm = keras_bert.AdamWarmup(decay_steps=decay_steps,
                                warmup_steps=warmup_steps,
                                lr=1e-4)

model.compile(optimizer=adawarm, loss=loss_dict[mode], metrics=['acc'])
# model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['acc'])

es = keras.callbacks.EarlyStopping(monitor='val_loss',
                                   patience=10,
                                   restore_best_weights=True)
cp = keras.callbacks.ModelCheckpoint('best_acc_model.h5', monitor='val_acc')
csvl = keras.callbacks.CSVLogger('train_log.csv')

history = model.fit([ind_array, seg_array, param],
Beispiel #23
0
#
# inp = layers.Input(shape=(max_sequence_len, ))
# emb = layers.Embedding(len(token_dict), 50, mask_zero=True)(inp)
# crf = CRF(len(tag_dict), sparse_target=True)(emb)
# base_model = models.Model(inputs=inp, outputs=crf)
# base_model.compile(optimizers.Adam(lr=0.01), crf_loss, metrics=[crf_viterbi_accuracy])
#
# base_model.summary()
#
# base_model.fit([train_sentence_indices], train_tags, validation_data=([devel_sentence_indices], devel_tags), batch_size=batch_size, epochs=50, verbose=1)

print("Loading BERT")

total_steps, warmup_steps = calc_train_steps(
    num_example=len(train_sentences),
    batch_size=batch_size,
    epochs=10,
    warmup_proportion=0.1,
)

print(total_steps, warmup_steps)

optimizer = AdamWarmup(5 * total_steps,
                       warmup_steps,
                       lr=2e-5,
                       min_lr=2e-7,
                       weight_decay=weight_decay)

# import pdb; pdb.set_trace()
bert_model = load_trained_model_from_checkpoint(config_path,
                                                checkpoint_path,
                                                training=False,
Beispiel #24
0
    with open('new_data.txt', 'w', encoding='utf-8') as file:
        file.write(json.dumps(dic, ensure_ascii=False))


if __name__ == '__main__':
    batch_size = 16
    learning_rate = 1e-3
    min_learning_rate = 1e-5
    epochs = 100
    is_test = False

    train_data, dev_data, test_data, id2class, class2id = read_data()

    total_steps, warmup_steps = calc_train_steps(
        num_example=len(train_data),
        batch_size=batch_size,
        epochs=epochs,
        warmup_proportion=0.1,
    )

    model, test_model = Graph(total_steps,
                              warmup_steps,
                              lr=learning_rate,
                              min_lr=min_learning_rate)

    if is_test:
        test_model.load_weights('output/subject_model.weights')
        model.load_weights('output/subject_model.weights')
        test(test_data, class2id, test_model)
        # acc = dev(dev_data, class2id, test_model)
        # print('acc: ', acc)
    else:
Beispiel #25
0
def main():
    args = get_args()

    if not os.path.exists(args.output_dir):
        os.makedirs(args.output_dir)

    np.random.seed(args.seed)

    if args.verbose:
        log.basicConfig(level=log.DEBUG, stream=sys.stdout)
    else:
        log.basicConfig(level=log.INFO, stream=sys.stdout)

    log.info('\n' + tabulate(sorted(vars(args).items())))
    set_logger(os.path.join(args.output_dir, args.log_file))

    pick_device()
    data = load_instances(args.dataset, args.label_col)
    classes = list(sorted(set(data[args.label_col])))
    args.n_classes = len(classes)

    token_dict = load_vocabulary(args.vocab_file)
    tokenizer = Tokenizer(token_dict)

    if args.do_train:
        folds = [i for i in args.train_dataset.split(',')]
        train_df = data[data['fold'].isin(folds)].reset_index(drop=True)
        train_generator = TextDataFrameIterator(
            dataframe=train_df,
            tokenizer=tokenizer,
            classes=classes,
            x_col=args.text_col,
            y_col=args.label_col,
            batch_size=args.batch_size,
            shuffle=True,
            seq_len=args.max_seq_length,
            seed=args.seed,
            do_lower_case=args.do_lower_case
        )

        folds = [i for i in args.val_dataset.split(',')]
        val_df = data[data['fold'].isin(folds)].reset_index(drop=True)
        val_generator = TextDataFrameIterator(
            dataframe=val_df,
            tokenizer=tokenizer,
            classes=classes,
            x_col=args.text_col,
            y_col=args.label_col,
            batch_size=args.batch_size,
            shuffle=False,
            seq_len=args.max_seq_length,
            do_lower_case=args.do_lower_case
        )

        total_steps, warmup_steps = calc_train_steps(
            num_example=len(train_df),
            batch_size=args.batch_size,
            epochs=args.epochs,
            warmup_proportion=args.warmup_proportion,
        )

        model = get_model(args)
        earlystop = callbacks.EarlyStopping(
            monitor='val_loss', min_delta=K.epsilon(), patience=args.earlystop,
            verbose=1, mode='auto')
        best_checkpoint = callbacks.ModelCheckpoint(
            os.path.join(args.output_dir, args.best_model),
            save_best_only=True, save_weights_only=False,
            monitor='val_loss', mode='min', verbose=1)
        csv_logger = callbacks.CSVLogger(os.path.join(args.output_dir, args.csv_logger))

        callbacks_list = [earlystop, best_checkpoint, csv_logger]
        optimizer = AdamWarmup(
            decay_steps=total_steps,
            warmup_steps=warmup_steps,
            lr=args.learning_rate,
            beta_1=0.9,
            beta_2=0.999,
            epsilon=1e-6,
            min_lr=1e-5,
            weight_decay=0.01,
            weight_decay_pattern=['embeddings', 'kernel', 'W1', 'W2', 'Wk', 'Wq', 'Wv', 'Wo']
        )
        model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy'])

        cw = get_class_weights(data, args.label_col, train_generator.class_indices)
        model.fit_generator(
            train_generator,
            class_weight=cw,
            use_multiprocessing=False,
            workers=args.workers,
            callbacks=callbacks_list,
            epochs=args.epochs,
            validation_data=val_generator,
            verbose=1)

    if args.do_test:
        folds = [i for i in args.test_dataset.split(',')]
        test_df = data[data['fold'].isin(folds)].reset_index(drop=True)
        test_generator = TextDataFrameIterator(
            dataframe=test_df,
            tokenizer=tokenizer,
            classes=classes,
            x_col=args.text_col,
            y_col=args.label_col,
            batch_size=args.batch_size,
            shuffle=False,
            seq_len=args.max_seq_length,
            do_lower_case=args.do_lower_case
        )

        print('Load from %s', os.path.join(args.output_dir, args.best_model))
        model = load_model(os.path.join(args.output_dir, args.best_model), custom_objects=get_custom_objects())
        # model.summary()
        y_score = model.predict_generator(
            test_generator,
            use_multiprocessing=False,
            workers=args.workers,
            verbose=1)

        y_pred = np.argmax(y_score, axis=1)

        pred_df = pd.DataFrame(y_score, columns=classes)
        pred_df = pred_df.assign(predictions=[classes[lbl] for lbl in y_pred])

        y_true = test_df.loc[:, args.label_col].values
        y_pred = pred_df['predictions'].values
        report = pmetrics.classification_report(y_true, y_pred, classes=classes)
        print(report.summary())
        # print('auc', pmetrics.auc(y_true, y_score, y_column=1)[0])

        result = pd.concat([test_df, pred_df], axis=1)
        result.to_csv(os.path.join(args.output_dir, args.test_predictions), index=False)

    if args.do_predict:
        test_df = load_instances(args.pred_dataset, args.label_col)
        test_generator = TextDataFrameIterator(
            dataframe=test_df,
            tokenizer=tokenizer,
            classes=None,
            x_col=args.text_col,
            y_col=args.label_col,
            batch_size=args.batch_size,
            shuffle=False,
            seq_len=args.max_seq_length,
            do_lower_case=args.do_lower_case
        )

        print('Load from %s', os.path.join(args.output_dir, args.best_model))
        model = load_model(os.path.join(args.output_dir, args.best_model), custom_objects=get_custom_objects())
        # model.summary()
        y_score = model.predict_generator(
            test_generator,
            use_multiprocessing=False,
            workers=args.workers,
            verbose=1)
        y_pred = np.argmax(y_score, axis=1)

        pred_df = pd.DataFrame(y_score, columns=classes)
        pred_df = pred_df.assign(predictions=[classes[lbl] for lbl in y_pred])
        result = pd.concat([test_df, pred_df], axis=1)
        result.to_csv(os.path.join(args.output_dir, args.pred_predictions), index=False)

    if args.do_debug:
        for dataset in [args.train_dataset, args.val_dataset, args.test_dataset]:
            folds = [i for i in dataset.split(',')]
            print('folds:', folds)
            sub_df = data[data['fold'].isin(folds)]
            generator = TextDataFrameIterator(
                dataframe=sub_df,
                tokenizer=tokenizer,
                x_col=args.text_col,
                y_col=args.label_col,
                batch_size=args.batch_size,
                shuffle=False,
                seq_len=args.max_seq_length,
            )
            for i, ([tokens, _], labels) in enumerate(generator):
                print(tokens.shape, type(tokens), labels.shape, type(labels))
                if i == 2:
                    break
Beispiel #26
0
    print("finish data processing!")

    # 模型训练
    model = create_cls_model(len(labels))
    train_D = DataGenerator(train_data)
    test_D = DataGenerator(test_data)

    print("begin model training...")
    # 保存最新的val_acc最好的模型文件
    filepath = "models/%s-{epoch:02d}-{val_acc:.4f}.h5" % DATA_DIR.split("/")[-1]
    checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max')
    # add warmup
    total_steps, warmup_steps = calc_train_steps(
        num_example=len(train_data),
        batch_size=BATCH_SIZE,
        epochs=EPOCH,
        warmup_proportion=0.1,
    )
    optimizer = AdamWarmup(total_steps, warmup_steps, lr=5e-5, min_lr=1e-7)
    model.compile(
        loss='categorical_crossentropy',
        optimizer=optimizer,
        metrics=['accuracy']
    )
    model.fit_generator(
        train_D.__iter__(),
        steps_per_epoch=len(train_D),
        epochs=EPOCH,
        validation_data=test_D.__iter__(),
        validation_steps=len(test_D),
        callbacks=[checkpoint]
Beispiel #27
0
def train_base():
    args = get_args()
    #frac = args.frac
    fold = args.fold
    EPOCHS = args.epochs

    BATCH_SIZE = 128
    LR = 1e-4

    with timed_bolck(f'Prepare train data#{BATCH_SIZE}'):
        X, y, _ = get_train_test_bert()

        ##Begin to define model
        from keras_bert import load_trained_model_from_checkpoint

        model = load_trained_model_from_checkpoint(
            config_path,
            checkpoint_path,
            training=True,
            seq_len=SEQ_LEN,
        )
        model.summary(line_length=120)

        from tensorflow.python import keras
        from keras_bert import AdamWarmup, calc_train_steps
        inputs = model.inputs[:2]
        dense = model.get_layer('NSP-Dense').output
        keras.models.Model(inputs, dense).summary()

        outputs = keras.layers.Dense(units=num_classes,
                                     activation='softmax')(dense)

        decay_steps, warmup_steps = calc_train_steps(
            y.shape[0],
            batch_size=BATCH_SIZE,
            epochs=EPOCHS,
        )

        model = keras.models.Model(inputs, outputs)
        model.compile(
            AdamWarmup(decay_steps=decay_steps,
                       warmup_steps=warmup_steps,
                       lr=LR),
            loss='categorical_crossentropy',
            metrics=['accuracy'],
        )
        ##End to define model

        input1_col = [col for col in X.columns if str(col).startswith('bert_')]
        input2_col = [col for col in X.columns if str(col).startswith('fea_')]
        #max_words = len(input1_col)
        model  #= get_model(max_words)

        #get_feature_manual.cache_clear()
        Y_cat = keras.utils.to_categorical(y, num_classes=num_classes)
        #folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=2019)

    with timed_bolck(f'Training#{fold}'):
        from core.split import split_df_by_index
        train_idx, test_idx = split_df_by_index(X, fold)

        logger.info(
            f'Shape train_x.loc[:, input1_col].iloc[:,0]: {X.loc[:, input1_col].iloc[:,0].shape}'
        )
        train_x, train_y, val_x, val_y = \
            X.iloc[train_idx], Y_cat[train_idx], X.iloc[test_idx], Y_cat[test_idx]

        logger.info(
            f'get_train_test output: train_x:{train_x.shape}, train_y:{train_y.shape}, val_x:{val_x.shape} '
        )

        #train_x, train_y = filter_short_desc(train_x, train_y)

        input1 = train_x.loc[:, input1_col]  #.astype(np.float32)
        input2 = np.zeros_like(input1)  #.astype(np.int8)

        logger.info(f'NN train_x:{train_x[:3]}')
        min_len_ratio = get_args().min_len_ratio
        max_bin = get_args().max_bin
        logger.info(
            f'NN Input1:{input1.shape}, Input2:{input2.shape}, SEQ_LEN:{SEQ_LEN}, min_len_ratio:{min_len_ratio}, bin:{max_bin} '
        )

        from keras_bert import get_custom_objects
        import tensorflow as tf
        with tf.keras.utils.custom_object_scope(get_custom_objects()):
            his = model.fit([input1, input2],
                            train_y,
                            validation_data=([
                                val_x.loc[:, input1_col],
                                np.zeros_like(val_x.loc[:, input1_col])
                            ], val_y),
                            epochs=EPOCHS,
                            shuffle=True,
                            batch_size=64,
                            callbacks=[Cal_acc(val_x, y.iloc[test_idx])]
                            #steps_per_epoch=1000, validation_steps=10
                            )

            #gen_sub(model, X_test, sn)

    return his