def test_calc_train_steps(self): total, warmup = calc_train_steps( num_example=1024, batch_size=32, epochs=10, warmup_proportion=0.1, ) self.assertEqual((320, 32), (total, warmup))
def train(self): x_trn, y_trn = self.train_data['text'][:].values, self.train_data['label'][:].values x_val, y_val = self.dev_data['text'][:].values, self.dev_data['label'][:].values x_test, y_test = self.test_data['text'][:].values, self.test_data['label'][:].values folds, batch_size, steps, max_len = 5, 16, 30, 300 y_vals_vote = np.zeros(len(y_val)) best_score = 0 model = self.create_model() total_steps, warmup_steps = calc_train_steps(num_example=x_trn.shape[0], batch_size=batch_size, epochs=steps, warmup_proportion=0.2) adamwarmup = AdamWarmup(total_steps, warmup_steps, lr=1e-4, min_lr=1e-6) losses = [self.distil_loss(), self.distil_loss(soft=True, T=self.T)] if self.Mode == 'patient': losses.extend([self.distil_loss(soft=True, T=self.T)]) elif self.Mode == 'patient.full': losses.extend([self.distil_loss(soft=True, T=self.T), self.distil_loss(soft=True, T=self.T), self.distil_loss(soft=True, T=self.T)]) model.compile(loss=losses, optimizer=adamwarmup) x1_val_tok, x2_val_tok = sentence2token(x_val, max_len=max_len) knowledge = self.teacher logit, feature10, feature11, feature12 = np.array(knowledge['logit']), \ np.array(knowledge['layer_10']), np.array(knowledge['layer_11']), np.array(knowledge['layer_12']) for epoch in range(steps): # ==========train=========== # generator = batch_iter(x_trn, y_trn, logit, feature10, feature11, feature12, max_len=max_len, batch_size=batch_size) for x1_tok, x2_tok, log, feat10, feat11, feat12, lab in generator: outputs = [np.eye(2)[lab], log] if self.Mode == 'patient': outputs.extend([feat12]) elif self.Mode == 'patient.full': outputs.extend([feat10, feat11, feat12]) model.train_on_batch( [x1_tok, x2_tok], outputs) # ==========eval=========== # y_val_pre = model.predict([x1_val_tok, x2_val_tok])[0] y_val_vote = np.argmax(y_val_pre, -1) # 最大的值所在的索引作为预测结果 f1, auc, acc, recall = score(y_val, y_val_vote) # ==========EarlyStop=========== # if f1 > best_score: patient = 0 best_score = f1 y_vals_vote = y_val_vote model.save_weights('models/distil_bert_model') print('epoch:{}, f1:{}, auc:{}, acc:{}, recall:{}, best_score:{}'.format( epoch, f1, auc, acc, recall, best_score)) patient += 1 if patient >= 5: break # ==========加载最优模型预测测试集=========== # model.load_weights('models/distil_bert_model') x1_test_tok, x2_test_tok = sentence2token(x_test, max_len=max_len) predict = np.argmax(model.predict([x1_test_tok, x2_test_tok])[0], -1) print('final dev score: ', score(y_val, y_vals_vote)) print('final test score: ', score(y_test, predict))
def get_opt(num_example, warmup_proportion=0.1, lr=2e-5, min_lr=None): if cfg["opt"].lower() == "nadam": opt = Nadam(lr=lr) else: total_steps, warmup_steps = calc_train_steps( num_example=num_example, batch_size=B_SIZE, epochs=MAX_EPOCH, warmup_proportion=warmup_proportion, ) opt = AdamWarmup(total_steps, warmup_steps, lr=lr, min_lr=min_lr) return opt
def _get_opt(num_example, warmup_proportion=0.1, lr=2e-5, min_lr=None): total_steps, warmup_steps = calc_train_steps( num_example=num_example, batch_size=B_SIZE, epochs=MAX_EPOCH, warmup_proportion=warmup_proportion, ) opt = AdamWarmup(total_steps, warmup_steps, lr=lr, min_lr=min_lr) if cfg.get("accum_step", None) and cfg["accum_step"] > 1: print("[!] using accum_step = {}".format(cfg["accum_step"])) from accum_optimizer import AccumOptimizer opt = AccumOptimizer(opt, steps_per_update=cfg["accum_step"]) return opt
def create_optimizer(num_example, options): total_steps, warmup_steps = calc_train_steps( num_example=num_example, batch_size=options.batch_size, epochs=options.num_train_epochs, warmup_proportion=options.warmup_proportion, ) optimizer = AdamWarmup( total_steps, warmup_steps, lr=options.learning_rate, epsilon=1e-6, weight_decay=0.01, weight_decay_pattern=['embeddings', 'kernel', 'W1', 'W2', 'Wk', 'Wq', 'Wv', 'Wo'] ) return optimizer
def model_build(len_train): global NUM_CLASSES global BATCH_SIZE global NUM_EPOCHS global MIN_LR global LR bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=MAXLEN, trainable=True) x1_in = Input(shape=(None, )) x2_in = Input(shape=(None, )) aux_in = Input(shape=(2, )) inputs = bert_model([x1_in, x2_in]) bert = Lambda(lambda x: x[:, 0])(inputs) dense = concatenate([bert, aux_in]) outputs = Dense(NUM_CLASSES, activation='softmax')(dense) model = Model([x1_in, x2_in, aux_in], outputs) decay_steps, warmup_steps = calc_train_steps( len_train, batch_size=BATCH_SIZE, epochs=NUM_EPOCHS, ) model.compile(loss='sparse_categorical_crossentropy', optimizer=AdamWarmup( decay_steps=decay_steps, warmup_steps=warmup_steps, lr=LR, min_lr=MIN_LR, ), metrics=['sparse_categorical_accuracy']) del bert_model gc.collect() return model
def compile_model(self, data_size, loss_fn, metrics): inputs = self.pretrained_model.inputs[:2] dense = self.pretrained_model.get_layer('NSP-Dense').output outputs = keras.layers.Dense(units=2, activation='softmax')(dense) decay_steps, warmup_steps = calc_train_steps( data_size, batch_size=self.batch_size, epochs=self.epochs, ) model = keras.models.Model(inputs, outputs) model.compile( AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=self.lr), loss=loss_fn, metrics=[metrics], ) self.model = model print(self.model.summary()) return self.model
def build(model, num, lr=0.00002): # @title Build Custom Model from tensorflow.python import keras from keras_bert import AdamWarmup, calc_train_steps inputs = model.inputs[:2] dense = model.get_layer('NSP-Dense').output outputs = keras.layers.Dense(units=len(le.classes_), activation='softmax')(dense) decay_steps, warmup_steps = calc_train_steps( num, batch_size=BATCH_SIZE, epochs=EPOCHS, ) model = keras.models.Model(inputs, outputs) for x in range(len(model.layers)): #print(x) model.layers[x].trainable = True ''' model.layers[-3].trainable = True model.layers[-4].trainable = True model.layers[-5].trainable = True model.layers[-6].trainable = True model.layers[-7].trainable = True ''' model.layers[-1].trainable = True model.layers[-2].trainable = True model.compile( AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=lr), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'], ) return model
def build_model(args): config = tf.ConfigProto() config.gpu_options.allow_growth = True #config.graph_options.optimizer_options.global_jit_level = tf.OptimizerOptions.ON_1 K.set_session(tf.Session(config=config)) print("Loading model..") custom_objects = get_custom_objects() bert_model = load_model(args.model, custom_objects=custom_objects) for layer in bert_model.layers: layer.trainable = False input_features = [Input(shape=(get_label_dim(args.train),)) for _ in args.features_train] stacked = Lambda(lambda x: K.stack(x, axis=1))([bert_model.output, *input_features]) stacked = Permute((2, 1), name="stack_permute")(stacked) output_layer = TimeDistributed(Dense(1, activation="tanh", name="decision"))(stacked) output_layer = Flatten(name="time_distributed_flatten")(output_layer) output_layer = Activation("softmax")(output_layer) # The bert model has multiple inputs, so unpack those. model = Model([*bert_model.input, *input_features], output_layer) if args.gpus > 1: template_model = model model = multi_gpu_model(template_model, gpus=args.gpus) callbacks = [Metrics()] if args.patience > -1: callbacks.append(EarlyStopping(patience=args.patience, verbose=1)) if args.checkpoint_interval > 0: callbacks.append(ModelCheckpoint(args.output_file + ".checkpoint-{epoch}", period=args.checkpoint_interval)) total_steps, warmup_steps = calc_train_steps(num_example=get_example_count(args.train), batch_size=args.batch_size, epochs=args.epochs, warmup_proportion=0.01) optimizer = AdamWarmup(total_steps, warmup_steps, lr=args.lr) model.compile(loss=["categorical_crossentropy"], optimizer=optimizer, metrics=[]) print(model.summary(line_length=118)) print("Number of GPUs in use:", args.gpus) print("Batch size:", args.batch_size) print("Learning rate:", args.lr) print("Dropout:", args.dropout) model.fit_generator(data_generator(args.train, args.batch_size, seq_len=args.seq_len, features=args.features_train), steps_per_epoch=ceil( get_example_count(args.train) / args.batch_size ), use_multiprocessing=True, epochs=args.epochs, callbacks=callbacks, validation_data=data_generator(args.dev, args.eval_batch_size, seq_len=args.seq_len, features=args.features_dev), validation_steps=ceil( get_example_count(args.dev) / args.eval_batch_size )) print("Saving model:", args.output_file) if args.gpus > 1: template_model.save(args.output_file) else: model.save(args.output_file)
def build_model(args): config = tf.ConfigProto() config.gpu_options.allow_growth = True K.set_session(tf.Session(config=config)) if args.load_model: print("Loading previously saved model..") if args.bert_config: print("Warning: --bert_config ignored when loading previous Keras model.", file=sys.stderr) custom_objects = get_custom_objects() model = load_model(args.load_model, custom_objects=custom_objects) else: print("Building model..") bert = load_trained_model_from_checkpoint(args.bert_config, args.init_checkpoint, training=False, trainable=True, seq_len=args.seq_len) transformer_output = get_encoder_component(name="Encoder-13", input_layer=bert.layers[-1].output, head_num=12, hidden_dim=3072, feed_forward_activation=gelu) drop_mask = Lambda(lambda x: x, name="drop_mask")(bert.output) slice_CLS = Lambda(lambda x: K.slice(x, [0, 0, 0], [-1, 1, -1]), name="slice_CLS")(drop_mask) flatten_CLS = Flatten()(slice_CLS) # Needed to avoid a json serialization error when saving the model. last_position = args.seq_len-1 slice_SEP = Lambda(lambda x: K.slice(x, [0, last_position, 0], [-1, 1, -1]), name="slice_SEP")(drop_mask) flatten_SEP = Flatten()(slice_SEP) permute_layer = Permute((2, 1))(drop_mask) permute_average = GlobalAveragePooling1D()(permute_layer) permute_maximum = GlobalMaxPooling1D()(permute_layer) concat = Concatenate()([permute_average, permute_maximum, flatten_CLS, flatten_SEP]) output_layer = Dense(get_label_dim(args.train), activation='sigmoid', name="label_out")(flatten_CLS) model = Model(bert.input, output_layer) total_steps, warmup_steps = calc_train_steps(num_example=get_example_count(args.train), batch_size=args.batch_size, epochs=args.epochs, warmup_proportion=0.01) # optimizer = AdamWarmup(total_steps, warmup_steps, lr=args.lr) optimizer = keras.optimizers.Adam(lr=args.lr) model.compile(loss=["binary_crossentropy"], optimizer=optimizer, metrics=[]) if args.gpus > 1: template_model = model # Set cpu_merge=False for better performance on NVLink connected GPUs. model = multi_gpu_model(template_model, gpus=args.gpus, cpu_merge=False) # TODO: need to compile this model as well when doing multigpu! callbacks = [Metrics(model)] if args.patience > -1: callbacks.append(EarlyStopping(patience=args.patience, verbose=1)) if args.checkpoint_interval > 0: callbacks.append(ModelCheckpoint(args.output_file + ".checkpoint-{epoch}", period=args.checkpoint_interval)) print(model.summary(line_length=118)) print("Number of GPUs in use:", args.gpus) print("Batch size:", args.batch_size) print("Learning rate:", K.eval(model.optimizer.lr)) # print("Dropout:", args.dropout) model.fit_generator(data_generator(args.train, args.batch_size, seq_len=args.seq_len), steps_per_epoch=ceil( get_example_count(args.train) / args.batch_size ), use_multiprocessing=True, epochs=args.epochs, callbacks=callbacks, validation_data=data_generator(args.dev, args.eval_batch_size, seq_len=args.seq_len), validation_steps=ceil( get_example_count(args.dev) / args.eval_batch_size )) print("Saving model:", args.output_file) if args.gpus > 1: template_model.save(args.output_file) else: model.save(args.output_file)
def main(argv): args = argparser().parse_args(argv[1:]) bert, vocab = load_pretrained(args) tokenizer = Tokenizer(vocab, cased=not args.do_lower_case) labels, train_sents, dev_sents, test_sents = load_data(args) train_data = create_examples(train_sents, tokenizer, labels, args) dev_data = create_examples(dev_sents, tokenizer, labels, args) test_data = create_examples(test_sents, tokenizer, labels, args) output = Dense(len(labels), activation='softmax')(bert.output) model = Model(inputs=bert.inputs, outputs=output) model.summary(line_length=80) train_input = np.array([e.input_ids for e in train_data]) train_in_mask = np.array([e.input_mask for e in train_data]) train_segments = np.array([e.segment_ids for e in train_data]) train_output = np.expand_dims( np.array([e.label_ids for e in train_data]), -1) train_head_flags = np.array([e.head_flags for e in train_data]) total_steps, warmup_steps = calc_train_steps( num_example=len(train_input), batch_size=args.train_batch_size, epochs=args.num_train_epochs, warmup_proportion=0.1, ) optimizer = AdamWarmup( total_steps, warmup_steps, lr=args.learning_rate, weight_decay=0.01, beta_1=0.9, beta_2=0.999, epsilon=1e-6, weight_decay_pattern=['embeddings', 'kernel', 'W1', 'W2', 'Wk', 'Wq', 'Wv', 'Wo'], min_lr=0 # TODO ) model.compile( loss='sparse_categorical_crossentropy', sample_weight_mode='temporal', optimizer=optimizer ) dev_input = np.array([e.input_ids for e in dev_data]) dev_in_mask = np.array([e.input_mask for e in dev_data]) dev_segments = np.array([e.segment_ids for e in dev_data]) dev_output = np.expand_dims(np.array([e.label_ids for e in dev_data]),-1) dev_head_flags = np.array([e.head_flags for e in dev_data]) train_start = datetime.now() print('start training at', train_start) train_cb = EvaluationCallback( 'train', train_input, train_segments, train_output, train_head_flags) dev_cb = EvaluationCallback( 'dev', dev_input, dev_segments, dev_output, dev_head_flags) callbacks = [train_cb, dev_cb] model.fit( [train_input, train_segments], train_output, sample_weight=train_in_mask, batch_size=args.train_batch_size, epochs=args.num_train_epochs, verbose=1, callbacks=callbacks ) train_end = datetime.now() print('done training', train_end, 'time', train_end-train_start) if args.predict is not None: if args.predict == 'dev': pred_data, pred_sents = dev_data, dev_sents else: assert args.predict == 'test' pred_data, pred_sents = test_data, test_sents pred_input = np.array([e.input_ids for e in pred_data]) pred_segments = np.array([e.segment_ids for e in pred_data]) pred = model.predict( [pred_input, pred_segments], verbose=1 ) pred_tokens = [[t for t, _ in s] for s in pred_sents] pred_head_flags = np.array([e.head_flags for e in pred_data]) write_predictions(pred_tokens, pred_input, pred_head_flags, pred, vocab, labels, args.output) print('best dev result', dev_cb.best, 'for epoch', dev_cb.best_epoch) return 0
model = load_trained_model_from_checkpoint( config_path, checkpoint_path, training=True, trainable=True, seq_len=SEQ_LEN, ) inputs = model.inputs[:2] dense = model.get_layer('NSP-Dense').output outputs = keras.layers.Dense(units=1, activation='sigmoid')(dense) model = keras.models.Model(inputs, outputs) total_steps, warmup_steps = calc_train_steps( num_example=x_train[0].shape[0], batch_size=BATCH_SIZE, epochs=EPOCHS, warmup_proportion=0.1, ) optimizer = AdamWarmup(total_steps, warmup_steps, lr=1e-4, min_lr=LR) model.compile( RAdam(LR), loss='binary_crossentropy', metrics=['accuracy'], ) model.summary() sess = K.get_session() uninitialized_variables = set( [i.decode('ascii') for i in sess.run(tf.report_uninitialized_variables())]) init_op = tf.variables_initializer([
def train(self): x_trn, y_trn = self.train_data['text'][:].values, self.train_data[ 'label'][:].values x_val, y_val = self.dev_data['text'][:].values, self.dev_data[ 'label'][:].values x_test, y_test = self.test_data['text'][:].values, self.test_data[ 'label'][:].values folds, batch_size, steps, max_len = 5, 16, 30, 300 y_vals = np.zeros((len(x_val), 2)) y_vals_vote = np.zeros(len(x_val)) y_test_pre = np.zeros((len(x_test), 2)) knowledge_dict = dict() model = self.create_model() total_steps, warmup_steps = calc_train_steps( num_example=x_trn.shape[0], batch_size=batch_size, epochs=steps, warmup_proportion=0.2) adamwarmup = AdamWarmup(total_steps, warmup_steps, lr=1e-5, min_lr=1e-7) model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=1e-5)) model.save_weights('origin') patient, best_score = 0, -1 x1_trn_tok, x2_trn_tok = sentence2token(x_trn, max_len=max_len) x1_val_tok, x2_val_tok = sentence2token(x_val, max_len=max_len) for epoch in range(steps): # ==========train=========== # generator = batch_iter(x_trn, y_trn, max_len=max_len, batch_size=batch_size) for x1_tok, x2_tok, lab in generator: model.train_on_batch([x1_tok, x2_tok], np.eye(2)[lab]) # ==========eval=========== # y_val_pre = model.predict([x1_val_tok, x2_val_tok]) y_val_vote = np.argmax(y_val_pre, -1) # 最大的值所在的索引作为预测结果 f1, auc, acc, recall = score(y_val, y_val_vote) # ==========EarlyStop=========== # if f1 > best_score: patient = 0 best_score = f1 y_vals_vote = y_val_vote y_vals = y_val_pre model.save_weights('weight') # =========save knowledge========== knowledge_dict = self.save_knowlege(x1_trn_tok, x2_trn_tok, model, knowledge_dict) print('epoch:{}, f1:{}, auc:{}, acc:{}, recall:{}, best_score:{}'. format(epoch, f1, auc, acc, recall, best_score)) patient += 1 if patient >= 5: break # ==========加载最优模型预测测试集=========== # model.load_weights('weight') x1_test_tok, x2_test_tok = sentence2token(x_test, max_len=max_len) predict = np.argmax(model.predict([x1_test_tok, x2_test_tok]), -1) print('final dev score: ', score(y_val, y_vals_vote)) print('final test score: ', score(y_test, predict)) # return y_test_vote, y_vals_vote, y_test, y_vals with open("teacher_knowledge.json", "w") as f: json.dump(knowledge_dict, f)
def manual_train(): #frac = args.frac args = get_args() fold = args.fold EPOCHS = args.epochs BATCH_SIZE = 32 LR = 1e-4 with timed_bolck(f'Prepare train data#{BATCH_SIZE}'): X, y, _ = get_train_test_bert() ##Begin to define model from keras_bert import load_trained_model_from_checkpoint model_bert = load_trained_model_from_checkpoint( config_path, checkpoint_path, training=True, seq_len=SEQ_LEN, ) #model_right = load_trained_model_from_checkpoint(config_path, checkpoint_path, training=True, seq_len=SEQ_LEN, ) from tensorflow.python import keras from keras_bert import AdamWarmup, calc_train_steps app_des = model_bert.inputs[:2] dense_app_des = model_bert.get_layer('NSP-Dense').output model_bert = keras.models.Model(inputs=app_des, outputs=dense_app_des, name='bert_output') inputs = [ keras.models.Input(shape=(SEQ_LEN, ), name=f'INPUT-{name}') for name in range(4) ] left = model_bert(inputs[:2]) right = model_bert(inputs[2:]) decay_steps, warmup_steps = calc_train_steps( y.shape[0], batch_size=BATCH_SIZE, epochs=EPOCHS, ) fc_ex = keras.layers.concatenate([left, right], axis=1) #fc_ex = keras.layers.Subtract()([left, right]) # End input from manual #outputs = keras.layers.Dense(units=8, activation='softmax')(fc_ex) outputs = keras.layers.Dense(units=1, activation='sigmoid')(fc_ex) model = keras.models.Model(inputs, outputs) model.compile( AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=LR), loss='binary_crossentropy', metrics=['accuracy'], ) model.summary(line_length=120) ##End to define model input1_col = [col for col in X.columns if str(col).startswith('bert_')] input3_col = [col for col in X.columns if str(col).startswith('fea_')] #max_words = len(input1_col) model #= get_model(max_words) Y_cat = y with timed_bolck(f'Training#{fold}'): from core.split import split_df_by_index_no_bin train_idx, test_idx = split_df_by_index_no_bin(X, fold) logger.info( f'Shape train_x.loc[:, input1_col].iloc[:,0]: {X.loc[:, input1_col].iloc[:,0].shape}' ) train_x, train_y, val_x, val_y = \ X.iloc[train_idx], Y_cat[train_idx], X.iloc[test_idx], Y_cat[test_idx] logger.info( f'get_train_test output: train_x:{train_x.shape}, train_y:{train_y.shape}, val_x:{val_x.shape} ' ) #for sn in range(5): input1 = train_x.loc[:, input1_col] #.astype(np.float32) input2 = np.zeros_like(input1) #.astype(np.int8) input3 = train_x.loc[:, input3_col] input4 = np.zeros_like(input3) logger.info( f'NN Input1:{input1.shape}, Input2:{input2.shape}, Input3:{input3.shape}' ) logger.info(f'NN train_x:{train_x[:3]}') from keras_bert import get_custom_objects import tensorflow as tf with tf.keras.utils.custom_object_scope(get_custom_objects()): his = model.fit([input1, input2, input3, input4], train_y, validation_data=([ val_x.loc[:, input1_col], np.zeros_like(val_x.loc[:, input1_col]), val_x.loc[:, input3_col], np.zeros_like(val_x.loc[:, input3_col]), ], val_y), epochs=EPOCHS, shuffle=True, batch_size=64, callbacks=[Cal_acc(val_x, y.iloc[test_idx])] #steps_per_epoch=1000, validation_steps=10 ) #gen_sub(model, X_test, sn) return his
num_classes = math_ops.cast(array_ops.shape(y_true)[1], y_pred.dtype) y_true = y_true * (1.0 - label_smoothing) + (label_smoothing / num_classes) return categorical_crossentropy(y_true, y_pred) if __name__ == '__main__': # 模型训练 train_D = DataGenerator(train_samples) dev_D = DataGenerator(dev_samples) model = SimpleMultiChoiceMRC(CONFIG_FILE_PATH, CHECKPOINT_FILE_PATH, MAX_SEQ_LENGTH, NUM_CHOICES).create_model() # add warmup total_steps, warmup_steps = calc_train_steps( num_example=len(train_samples), batch_size=BATCH_SIZE, epochs=EPOCH, warmup_proportion=WARMUP_RATION, ) optimizer = AdamWarmup(total_steps, warmup_steps, lr=2e-5, min_lr=1e-8) filepath = "models/multi_choice_model_%s-{epoch:02d}-{val_acc:.4f}.h5" % dataset checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, save_weights_only=True, mode='max') model.compile(loss=categorical_crossentropy_with_label_smoothing, optimizer=optimizer, metrics=['accuracy']) print("begin model training...")
target.append(No) target.append(No) return x1, x2, target, ids train_x1, train_x2, train_target, _ = genete_data1(train_topic, train_text, train_stance) test_x1, test_x2, test_target, test_id = genete_data1(test_topic, test_text, test_stance) from keras.layers import * from keras.models import Model from keras_bert import AdamWarmup, calc_train_steps total_steps, warmup_steps = calc_train_steps( num_example=len(train_x1), batch_size=4, epochs=train_epochs, warmup_proportion=0.1, ) optimizer = AdamWarmup(total_steps, warmup_steps, lr=1e-3, min_lr=1e-5) bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=None) for l in bert_model.layers: l.trainable = True x1_in = Input(shape=(maxlen, )) x2_in = Input(shape=(maxlen, )) x = bert_model([x1_in, x2_in]) x = Lambda(lambda x: x[:, 0])(x) p = Dense(1, activation='sigmoid')(x)
indices = np.array(indices) return [indices, np.zeros_like(indices)], np.array(sentiments) # 加载数据并转为词id train_path = os.path.join(os.path.dirname(dataset), 'aclImdb', 'train') test_path = os.path.join(os.path.dirname(dataset), 'aclImdb', 'test') train_x, train_y = load_data(train_path) test_x, test_y = load_data(test_path) # 定义自定义模型 inputs = model.inputs[:2] bert_out_seq = model.get_ dense = model.get_layer('NSP-Dense').output # 获取'NSP-Dense'层的输出 outputs = keras.layers.Dense(units=2, activation='softmax')(dense) # 稠密层 + softmax decay_steps, warmup_steps = calc_train_steps( # 指数衰减步数,热启动步数 train_y.shape[0], batch_size=BATCH_SIZE, epochs=EPOCHS, ) model = keras.models.Model(inputs, outputs) model.compile( # 编译模型以供训练 AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=LR), loss='sparse_categorical_crossentropy', metrics=['sparse_categorical_accuracy'], ) # 初始化所有变量 sess = K.get_session() uninitialized_variables = set([i.decode('ascii') for i in sess.run(tf.report_uninitialized_variables())]) init_op = tf.variables_initializer( [v for v in tf.global_variables() if v.name.split(':')[0] in uninitialized_variables]
def main(): seq_id, seq_O, seq_P, id_to_label, id_to_term = encode_seq( df_label=df_label, maxlen=MAX_LEN) class Evaluation(Callback): def __init__(self, val_data, interval=1): self.val_data = val_data self.interval = interval self.best_f1 = 0. self.true_vp_val = [ (row["id"], row["OpinionTerms"], row["Polarities"], row['O_start'], row['O_end']) for rowid, row in df_label[ df_label['id'].isin(self.val_data[0])].iterrows() ] def on_epoch_end(self, epoch, log={}): if epoch % self.interval == 0: o_out, p_out = pred_model.predict( self.val_data[1:4], batch_size=BATCH_SIZE) # CRF概率 o_pred = np.argmax(o_out, axis=2) p_pred = np.argmax(p_out, axis=2) texts = [ df_review[df_review['id'] == i]["Reviews"].values[0] for i in self.val_data[0] ] pred_vp_val = decode_seq(self.val_data[0], o_pred, p_pred, id_to_label, texts) precision, recall, f1 = cal_opinion_metrics( pred_vp_val, self.true_vp_val) if f1 > self.best_f1: self.best_f1 = f1 self.model.save_weights( f'./model_op/op_model_0924_viteb.weights') print(f'best = {f1}') tokenizer = BertTokenizer(token_dict) seq_input, seq_seg = bert_text_to_seq(list(df_review["Reviews"]), tokenizer, maxlen=MAX_LEN) true_vp = [(row["id"], row["OpinionTerms"], row["Polarities"], row['O_start'], row['O_end']) for rowid, row in df_label.iterrows()] pred_vp = decode_seq(seq_id, seq_O, seq_P, id_to_label, list(df_review["Reviews"])) cal_opinion_metrics(pred_vp, true_vp) seq_O = to_categorical(seq_O) seq_P = to_categorical(seq_P) df_review['pos_tag'] = df_review['Reviews'].progress_apply(pos_tag) with open('./data/postag2id_0922_laptop_make_up.pkl', 'rb') as f: postag2id = pickle.load(f) df_review['pos_tag'] = df_review['pos_tag'].progress_apply( lambda postag: [postag2id[x] for x in postag]) seq_postag = np.array(df_review['pos_tag'].values.tolist()) view_train, view_val = split_viewpoints(seq_id, seq_input, seq_seg, seq_O, seq_P, seq_postag) print(view_val[0]) print('------------------- 保存验证集的id ---------------------') print('保存final 验证集的val ids') # np.save('./data/final_makeup_laptop_val_ids', view_val[0]) print('------------------- 保存完毕 ---------------------------') # exit() bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, seq_len=None) for l in bert_model.layers: l.trainable = True x1_in = Input(shape=(MAX_LEN, ), name='x1_in') x2_in = Input(shape=(MAX_LEN, ), name='x2_in') o_in = Input(shape=( MAX_LEN, len(id_to_term) + 1, ), name='o_in') p_in = Input(shape=( MAX_LEN, len(id_to_label) + 1, ), name='p_in') pos_tag_in = Input(shape=(MAX_LEN, ), name='pos_tag_in') pos_tag_emb = Embedding(len(postag2id), POS_TAG_DIM, trainable=True)(pos_tag_in) x = bert_model([x1_in, x2_in]) x = Concatenate()([x, pos_tag_emb]) p_out = Dense(len(id_to_label) + 1, activation='softmax')(x) # p_out 是极性的输出 crf = CRF(len(id_to_term) + 1) o_out = crf(x) loss_seq_O = crf.loss_function(o_in, o_out) # 直接加入 Lambda层后 计算图会出错 loss_seq_O = Lambda(lambda x: K.mean(x))(loss_seq_O) # loss_seq_O = Lambda(lambda x: K.mean(categorical_crossentropy(x[0], x[1])), name='loss_seq_O')([o_in, o_out]) loss_p = Lambda(lambda x: K.mean(categorical_crossentropy(x[0], x[1])), name='loss_c')([p_in, p_out]) train_model = Model([x1_in, x2_in, pos_tag_in, o_in, p_in], [o_out, p_out]) pred_model = Model([x1_in, x2_in, pos_tag_in], [o_out, p_out]) train_model._losses = [] train_model._per_input_losses = {} train_model.add_loss(loss_seq_O) train_model.add_loss(loss_p) print(view_train[0].shape[0]) total_steps, warmup_steps = calc_train_steps( num_example=view_train[0].shape[0], batch_size=BATCH_SIZE, epochs=EPOCHS, warmup_proportion=0.1, ) # optimizer = Adam(lr=1e-5) optimizer = AdamWarmup(total_steps, warmup_steps, lr=5e-5, min_lr=1e-6) train_model.compile(optimizer=optimizer) train_model.metrics_tensors.append(loss_seq_O) train_model.metrics_names.append('loss_seq_O') train_model.metrics_tensors.append(loss_p) train_model.metrics_names.append('loss_p') train_model.summary() eval_callback = Evaluation(val_data=view_val) train_model.fit(view_train[1:], epochs=EPOCHS, shuffle=True, batch_size=BATCH_SIZE, callbacks=[eval_callback])
# -*- coding: utf-8 -*- """ Created on Tue Nov 23 20:51:40 2021 @author: xiuzhang """ import numpy as np from keras_bert import AdamWarmup, calc_train_steps #生成随机数 train_x = np.random.standard_normal((1024, 100)) print(train_x) #分批训练 total_steps, warmup_steps = calc_train_steps( num_example=train_x.shape[0], batch_size=32, epochs=10, warmup_proportion=0.1, ) optimizer = AdamWarmup(total_steps, warmup_steps, lr=1e-3, min_lr=1e-5) print(optimizer)
def manual_train(): #frac = args.frac args = get_args() fold = args.fold EPOCHS = args.epochs BATCH_SIZE = 128 LR = 1e-4 with timed_bolck(f'Prepare train data#{BATCH_SIZE}'): X, y, _ = get_train_test_bert() ##Begin to define model from keras_bert import load_trained_model_from_checkpoint model = load_trained_model_from_checkpoint(config_path, checkpoint_path, training=True, seq_len=SEQ_LEN, ) from tensorflow.python import keras from keras_bert import AdamWarmup, calc_train_steps inputs = model.inputs[:2] dense_bert = model.get_layer('NSP-Dense').output decay_steps, warmup_steps = calc_train_steps( y.shape[0], batch_size=BATCH_SIZE, epochs=EPOCHS, ) # New input from manual data = get_feature_bert_wv().add_prefix('fea_') manual_fea_len = len([col for col in data.columns if col.startswith('fea_')]) logger.info(f'manual_fea_len:{manual_fea_len}') manual_feature = keras.Input(shape=(manual_fea_len,), name='manual_feature', dtype='float32') inputs = inputs + [manual_feature] manual_feature = keras.layers.Dense(round(num_classes*0.6), name='manual_dense', activation='relu')(manual_feature) manual_feature = keras.layers.Dropout(0.5)(manual_feature) #manual_feature = keras.layers.Dense(round(num_classes), activation='relu')(manual_feature) fc_ex = keras.layers.concatenate([dense_bert, manual_feature], axis=1) # End input from manual #fc_ex = keras.layers.Dense(units=1024, activation='softmax')(fc_ex) outputs = keras.layers.Dense(units=num_classes, activation='softmax')(fc_ex) model = keras.models.Model(inputs, outputs) model.compile( AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=LR), loss='categorical_crossentropy', metrics=['accuracy'], ) model.summary(line_length=120) ##End to define model input1_col = [col for col in X.columns if str(col).startswith('bert_')] input3_col = [col for col in X.columns if str(col).startswith('fea_')] #max_words = len(input1_col) model #= get_model(max_words) #get_feature_manual.cache_clear() Y_cat = keras.utils.to_categorical(y, num_classes=num_classes) #folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=2019) with timed_bolck(f'Training#{fold}'): from core.split import split_df_by_index train_idx, test_idx = split_df_by_index(X,fold) logger.info(f'Shape train_x.loc[:, input1_col].iloc[:,0]: {X.loc[:, input1_col].iloc[:,0].shape}') train_x, train_y, val_x, val_y = \ X.iloc[train_idx], Y_cat[train_idx], X.iloc[test_idx], Y_cat[test_idx] logger.info(f'get_train_test output: train_x:{train_x.shape}, train_y:{train_y.shape}, val_x:{val_x.shape} ') #for sn in range(5): input1 = train_x.loc[:, input1_col]#.astype(np.float32) input2 = np.zeros_like(input1)#.astype(np.int8) input3 = train_x.loc[:, input3_col] logger.info(f'NN Input1:{input1.shape}, Input2:{input2.shape}, Input3:{input3.shape}') logger.info(f'NN train_x:{train_x[:3]}') from keras_bert import get_custom_objects import tensorflow as tf with tf.keras.utils.custom_object_scope(get_custom_objects()): his = model.fit([input1, input2, input3], train_y, validation_data = ([ val_x.loc[:, input1_col], np.zeros_like(val_x.loc[:, input1_col]), val_x.loc[:, input3_col] ], val_y), epochs=EPOCHS, shuffle=True, batch_size=64, callbacks=[Cal_acc(val_x, y.iloc[test_idx] )] #steps_per_epoch=1000, validation_steps=10 ) #gen_sub(model, X_test, sn) return his
a_model = Model([x1_in, x2_in, opinion_mask_in, lf_pos_in, rt_pos_in], a_out) cp_model = Model([x1_in, x2_in, opinion_mask_in, lf_pos_in, rt_pos_in], c_out) train_model = Model( [x1_in, x2_in, seq_a_in, opinion_mask_in, lf_pos_in, rt_pos_in, c_in], [a_out, c_out]) loss_c = Lambda(lambda x: K.mean(categorical_crossentropy(x[0], x[1])), name='loss_p')([c_in, c_out]) train_model.add_loss(loss_A) train_model.add_loss(loss_c) total_steps, warmup_steps = calc_train_steps( num_example=train_data[0].shape[0], batch_size=BATCH_SIZE, epochs=100, warmup_proportion=0.05, ) optimizer = AdamWarmup(total_steps, warmup_steps, lr=1e-4, min_lr=1e-6) train_model.compile(optimizer=optimizer) train_model.metrics_tensors.append(loss_A) train_model.metrics_names.append('loss_A') train_model.metrics_tensors.append(loss_c) train_model.metrics_names.append('loss_c') train_model.summary() eval_callback = Evaluation(val_data=val_data)
f = keras.layers.Dense(32, activation='relu')(f) f = keras.layers.Dropout(0.5)(f) outpt = keras.layers.Dense(classes_dict[mode], activation=activation_dict[mode])(f) model = keras.models.Model([ind, seg, inpt2], outpt) # model.summary() # keras.utils.plot_model(model,'model.png') """#Train""" batch_size = 5 epochs = 60 decay_steps, warmup_steps = keras_bert.calc_train_steps(4 * ln // 5, batch_size=batch_size, epochs=epochs) adawarm = keras_bert.AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=1e-4) model.compile(optimizer=adawarm, loss=loss_dict[mode], metrics=['acc']) # model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['acc']) es = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True) cp = keras.callbacks.ModelCheckpoint('best_acc_model.h5', monitor='val_acc') csvl = keras.callbacks.CSVLogger('train_log.csv') history = model.fit([ind_array, seg_array, param],
# # inp = layers.Input(shape=(max_sequence_len, )) # emb = layers.Embedding(len(token_dict), 50, mask_zero=True)(inp) # crf = CRF(len(tag_dict), sparse_target=True)(emb) # base_model = models.Model(inputs=inp, outputs=crf) # base_model.compile(optimizers.Adam(lr=0.01), crf_loss, metrics=[crf_viterbi_accuracy]) # # base_model.summary() # # base_model.fit([train_sentence_indices], train_tags, validation_data=([devel_sentence_indices], devel_tags), batch_size=batch_size, epochs=50, verbose=1) print("Loading BERT") total_steps, warmup_steps = calc_train_steps( num_example=len(train_sentences), batch_size=batch_size, epochs=10, warmup_proportion=0.1, ) print(total_steps, warmup_steps) optimizer = AdamWarmup(5 * total_steps, warmup_steps, lr=2e-5, min_lr=2e-7, weight_decay=weight_decay) # import pdb; pdb.set_trace() bert_model = load_trained_model_from_checkpoint(config_path, checkpoint_path, training=False,
with open('new_data.txt', 'w', encoding='utf-8') as file: file.write(json.dumps(dic, ensure_ascii=False)) if __name__ == '__main__': batch_size = 16 learning_rate = 1e-3 min_learning_rate = 1e-5 epochs = 100 is_test = False train_data, dev_data, test_data, id2class, class2id = read_data() total_steps, warmup_steps = calc_train_steps( num_example=len(train_data), batch_size=batch_size, epochs=epochs, warmup_proportion=0.1, ) model, test_model = Graph(total_steps, warmup_steps, lr=learning_rate, min_lr=min_learning_rate) if is_test: test_model.load_weights('output/subject_model.weights') model.load_weights('output/subject_model.weights') test(test_data, class2id, test_model) # acc = dev(dev_data, class2id, test_model) # print('acc: ', acc) else:
def main(): args = get_args() if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) np.random.seed(args.seed) if args.verbose: log.basicConfig(level=log.DEBUG, stream=sys.stdout) else: log.basicConfig(level=log.INFO, stream=sys.stdout) log.info('\n' + tabulate(sorted(vars(args).items()))) set_logger(os.path.join(args.output_dir, args.log_file)) pick_device() data = load_instances(args.dataset, args.label_col) classes = list(sorted(set(data[args.label_col]))) args.n_classes = len(classes) token_dict = load_vocabulary(args.vocab_file) tokenizer = Tokenizer(token_dict) if args.do_train: folds = [i for i in args.train_dataset.split(',')] train_df = data[data['fold'].isin(folds)].reset_index(drop=True) train_generator = TextDataFrameIterator( dataframe=train_df, tokenizer=tokenizer, classes=classes, x_col=args.text_col, y_col=args.label_col, batch_size=args.batch_size, shuffle=True, seq_len=args.max_seq_length, seed=args.seed, do_lower_case=args.do_lower_case ) folds = [i for i in args.val_dataset.split(',')] val_df = data[data['fold'].isin(folds)].reset_index(drop=True) val_generator = TextDataFrameIterator( dataframe=val_df, tokenizer=tokenizer, classes=classes, x_col=args.text_col, y_col=args.label_col, batch_size=args.batch_size, shuffle=False, seq_len=args.max_seq_length, do_lower_case=args.do_lower_case ) total_steps, warmup_steps = calc_train_steps( num_example=len(train_df), batch_size=args.batch_size, epochs=args.epochs, warmup_proportion=args.warmup_proportion, ) model = get_model(args) earlystop = callbacks.EarlyStopping( monitor='val_loss', min_delta=K.epsilon(), patience=args.earlystop, verbose=1, mode='auto') best_checkpoint = callbacks.ModelCheckpoint( os.path.join(args.output_dir, args.best_model), save_best_only=True, save_weights_only=False, monitor='val_loss', mode='min', verbose=1) csv_logger = callbacks.CSVLogger(os.path.join(args.output_dir, args.csv_logger)) callbacks_list = [earlystop, best_checkpoint, csv_logger] optimizer = AdamWarmup( decay_steps=total_steps, warmup_steps=warmup_steps, lr=args.learning_rate, beta_1=0.9, beta_2=0.999, epsilon=1e-6, min_lr=1e-5, weight_decay=0.01, weight_decay_pattern=['embeddings', 'kernel', 'W1', 'W2', 'Wk', 'Wq', 'Wv', 'Wo'] ) model.compile(optimizer, loss='categorical_crossentropy', metrics=['accuracy']) cw = get_class_weights(data, args.label_col, train_generator.class_indices) model.fit_generator( train_generator, class_weight=cw, use_multiprocessing=False, workers=args.workers, callbacks=callbacks_list, epochs=args.epochs, validation_data=val_generator, verbose=1) if args.do_test: folds = [i for i in args.test_dataset.split(',')] test_df = data[data['fold'].isin(folds)].reset_index(drop=True) test_generator = TextDataFrameIterator( dataframe=test_df, tokenizer=tokenizer, classes=classes, x_col=args.text_col, y_col=args.label_col, batch_size=args.batch_size, shuffle=False, seq_len=args.max_seq_length, do_lower_case=args.do_lower_case ) print('Load from %s', os.path.join(args.output_dir, args.best_model)) model = load_model(os.path.join(args.output_dir, args.best_model), custom_objects=get_custom_objects()) # model.summary() y_score = model.predict_generator( test_generator, use_multiprocessing=False, workers=args.workers, verbose=1) y_pred = np.argmax(y_score, axis=1) pred_df = pd.DataFrame(y_score, columns=classes) pred_df = pred_df.assign(predictions=[classes[lbl] for lbl in y_pred]) y_true = test_df.loc[:, args.label_col].values y_pred = pred_df['predictions'].values report = pmetrics.classification_report(y_true, y_pred, classes=classes) print(report.summary()) # print('auc', pmetrics.auc(y_true, y_score, y_column=1)[0]) result = pd.concat([test_df, pred_df], axis=1) result.to_csv(os.path.join(args.output_dir, args.test_predictions), index=False) if args.do_predict: test_df = load_instances(args.pred_dataset, args.label_col) test_generator = TextDataFrameIterator( dataframe=test_df, tokenizer=tokenizer, classes=None, x_col=args.text_col, y_col=args.label_col, batch_size=args.batch_size, shuffle=False, seq_len=args.max_seq_length, do_lower_case=args.do_lower_case ) print('Load from %s', os.path.join(args.output_dir, args.best_model)) model = load_model(os.path.join(args.output_dir, args.best_model), custom_objects=get_custom_objects()) # model.summary() y_score = model.predict_generator( test_generator, use_multiprocessing=False, workers=args.workers, verbose=1) y_pred = np.argmax(y_score, axis=1) pred_df = pd.DataFrame(y_score, columns=classes) pred_df = pred_df.assign(predictions=[classes[lbl] for lbl in y_pred]) result = pd.concat([test_df, pred_df], axis=1) result.to_csv(os.path.join(args.output_dir, args.pred_predictions), index=False) if args.do_debug: for dataset in [args.train_dataset, args.val_dataset, args.test_dataset]: folds = [i for i in dataset.split(',')] print('folds:', folds) sub_df = data[data['fold'].isin(folds)] generator = TextDataFrameIterator( dataframe=sub_df, tokenizer=tokenizer, x_col=args.text_col, y_col=args.label_col, batch_size=args.batch_size, shuffle=False, seq_len=args.max_seq_length, ) for i, ([tokens, _], labels) in enumerate(generator): print(tokens.shape, type(tokens), labels.shape, type(labels)) if i == 2: break
print("finish data processing!") # 模型训练 model = create_cls_model(len(labels)) train_D = DataGenerator(train_data) test_D = DataGenerator(test_data) print("begin model training...") # 保存最新的val_acc最好的模型文件 filepath = "models/%s-{epoch:02d}-{val_acc:.4f}.h5" % DATA_DIR.split("/")[-1] checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') # add warmup total_steps, warmup_steps = calc_train_steps( num_example=len(train_data), batch_size=BATCH_SIZE, epochs=EPOCH, warmup_proportion=0.1, ) optimizer = AdamWarmup(total_steps, warmup_steps, lr=5e-5, min_lr=1e-7) model.compile( loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'] ) model.fit_generator( train_D.__iter__(), steps_per_epoch=len(train_D), epochs=EPOCH, validation_data=test_D.__iter__(), validation_steps=len(test_D), callbacks=[checkpoint]
def train_base(): args = get_args() #frac = args.frac fold = args.fold EPOCHS = args.epochs BATCH_SIZE = 128 LR = 1e-4 with timed_bolck(f'Prepare train data#{BATCH_SIZE}'): X, y, _ = get_train_test_bert() ##Begin to define model from keras_bert import load_trained_model_from_checkpoint model = load_trained_model_from_checkpoint( config_path, checkpoint_path, training=True, seq_len=SEQ_LEN, ) model.summary(line_length=120) from tensorflow.python import keras from keras_bert import AdamWarmup, calc_train_steps inputs = model.inputs[:2] dense = model.get_layer('NSP-Dense').output keras.models.Model(inputs, dense).summary() outputs = keras.layers.Dense(units=num_classes, activation='softmax')(dense) decay_steps, warmup_steps = calc_train_steps( y.shape[0], batch_size=BATCH_SIZE, epochs=EPOCHS, ) model = keras.models.Model(inputs, outputs) model.compile( AdamWarmup(decay_steps=decay_steps, warmup_steps=warmup_steps, lr=LR), loss='categorical_crossentropy', metrics=['accuracy'], ) ##End to define model input1_col = [col for col in X.columns if str(col).startswith('bert_')] input2_col = [col for col in X.columns if str(col).startswith('fea_')] #max_words = len(input1_col) model #= get_model(max_words) #get_feature_manual.cache_clear() Y_cat = keras.utils.to_categorical(y, num_classes=num_classes) #folds = StratifiedKFold(n_splits=5, shuffle=True, random_state=2019) with timed_bolck(f'Training#{fold}'): from core.split import split_df_by_index train_idx, test_idx = split_df_by_index(X, fold) logger.info( f'Shape train_x.loc[:, input1_col].iloc[:,0]: {X.loc[:, input1_col].iloc[:,0].shape}' ) train_x, train_y, val_x, val_y = \ X.iloc[train_idx], Y_cat[train_idx], X.iloc[test_idx], Y_cat[test_idx] logger.info( f'get_train_test output: train_x:{train_x.shape}, train_y:{train_y.shape}, val_x:{val_x.shape} ' ) #train_x, train_y = filter_short_desc(train_x, train_y) input1 = train_x.loc[:, input1_col] #.astype(np.float32) input2 = np.zeros_like(input1) #.astype(np.int8) logger.info(f'NN train_x:{train_x[:3]}') min_len_ratio = get_args().min_len_ratio max_bin = get_args().max_bin logger.info( f'NN Input1:{input1.shape}, Input2:{input2.shape}, SEQ_LEN:{SEQ_LEN}, min_len_ratio:{min_len_ratio}, bin:{max_bin} ' ) from keras_bert import get_custom_objects import tensorflow as tf with tf.keras.utils.custom_object_scope(get_custom_objects()): his = model.fit([input1, input2], train_y, validation_data=([ val_x.loc[:, input1_col], np.zeros_like(val_x.loc[:, input1_col]) ], val_y), epochs=EPOCHS, shuffle=True, batch_size=64, callbacks=[Cal_acc(val_x, y.iloc[test_idx])] #steps_per_epoch=1000, validation_steps=10 ) #gen_sub(model, X_test, sn) return his