def __init__(self, maxLen, ocrLen, max_features, init_embedding_matrix, name='basicModel', num_flods=4, batch_size=64): """ parameters initialize :param maxLen: :param max_features: :param init_embedding_matrix: """ self.name = name self.ocrLen = ocrLen self.batch_size = batch_size self.maxLen = maxLen self.max_features = max_features self.embedding_matrix = init_embedding_matrix self.embed_size = len(init_embedding_matrix[0]) self.num_folds = num_flods self.kf = KFold(n_splits=self.num_folds, shuffle=True, random_state=10) M = 3 # number of snapshots alpha_zero = 5e-4 # initial learning rate self.snap_epoch = 12 self.snapshot = SnapshotCallbackBuilder(self.snap_epoch, M, alpha_zero) self.model = self.create_model()
def __init__(self, n_folds=5, name='BasicModel', config=None): if config is None: exit('请传入数值') self.name = name self.config = config self.n_class = config.n_class # char 特征 self.char_max_len = config.CHAR_MAXLEN self.max_c_features = config.max_c_features # word 特征 self.word_max_len = config.WORD_MAXLEN self.max_w_features = config.max_w_features self.char_mask_value = self.max_c_features - 2 self.word_mask_value = self.max_w_features - 2 self.batch_size = config.BATCH_SIZE self.char_embedding = config.char_init_embed self.word_embedding = config.word_init_embed self.char_embed_size = len(self.char_embedding[0]) self.word_embed_size = len(self.word_embedding[0]) self.n_folds = n_folds self.kf = KFold(n_splits=n_folds, shuffle=True, random_state=10) M = 3 # number of snapshots # alpha_zero = 5e-4 # initial learning rate # self.snap_epoch = NUM_EPOCHS # self.snapshot = SnapshotCallbackBuilder(self.snap_epoch, M, alpha_zero) self.last_val_acc = 0. self.init_lr = 0.001 self.lr_schedule = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=1, min_lr=0.000001, verbose=1) # if self.config.option == 6: # self.init_lr = 1e-3 # elif self.config.option == 5: # if 'attention' in self.config.model_name: # self.wd = 0.001 # if 'textcnn' in self.config.model_name: # self.init_lr = 0.001 # self.wd = 0.0015 # if 'capsule' in self.config.model_name: # self.init_lr = 0.001 # self.wd = 0.003 # if 'lstmgru' in self.config.model_name: # self.init_lr = 0.001 # elif self.config.option == 4: # self.init_lr = 0.001 # elif self.config.option == 3: # self.init_lr = 0.002 # # self.poly_decay = self.poly_decay_attention # else: # self.init_lr = 1e-3 self.snapshot = SnapshotCallbackBuilder(NUM_EPOCHS, M, self.init_lr) self.early_stop_monitor = EarlyStopping(patience=5) print("[INFO] training with {} GPUs...".format(config.n_gpus)) self.wd = config.wd self.model = self.create_model() if config.n_gpus > 1: self.model = multi_gpu_model(self.model, gpus=config.n_gpus)
class BasicDeepModel(BasicModel): """Docstring for BasicModel. """ def __init__(self, n_folds=5, name='BasicModel', config=None): if config is None: exit('请传入数值') self.name = name self.config = config self.n_class = config.n_class # char 特征 self.char_max_len = config.CHAR_MAXLEN self.max_c_features = config.max_c_features # word 特征 self.word_max_len = config.WORD_MAXLEN self.max_w_features = config.max_w_features self.char_mask_value = self.max_c_features - 2 self.word_mask_value = self.max_w_features - 2 self.batch_size = config.BATCH_SIZE self.char_embedding = config.char_init_embed self.word_embedding = config.word_init_embed self.char_embed_size = len(self.char_embedding[0]) self.word_embed_size = len(self.word_embedding[0]) self.n_folds = n_folds self.kf = KFold(n_splits=n_folds, shuffle=True, random_state=10) M = 3 # number of snapshots # alpha_zero = 5e-4 # initial learning rate # self.snap_epoch = NUM_EPOCHS # self.snapshot = SnapshotCallbackBuilder(self.snap_epoch, M, alpha_zero) self.last_val_acc = 0. self.init_lr = 0.001 self.lr_schedule = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=1, min_lr=0.000001, verbose=1) # if self.config.option == 6: # self.init_lr = 1e-3 # elif self.config.option == 5: # if 'attention' in self.config.model_name: # self.wd = 0.001 # if 'textcnn' in self.config.model_name: # self.init_lr = 0.001 # self.wd = 0.0015 # if 'capsule' in self.config.model_name: # self.init_lr = 0.001 # self.wd = 0.003 # if 'lstmgru' in self.config.model_name: # self.init_lr = 0.001 # elif self.config.option == 4: # self.init_lr = 0.001 # elif self.config.option == 3: # self.init_lr = 0.002 # # self.poly_decay = self.poly_decay_attention # else: # self.init_lr = 1e-3 self.snapshot = SnapshotCallbackBuilder(NUM_EPOCHS, M, self.init_lr) self.early_stop_monitor = EarlyStopping(patience=5) print("[INFO] training with {} GPUs...".format(config.n_gpus)) self.wd = config.wd self.model = self.create_model() if config.n_gpus > 1: self.model = multi_gpu_model(self.model, gpus=config.n_gpus) def poly_decay_attention(self, epoch): # initialize the maximum number of epochs, base learning rate, # and power of the polynomial if epoch < 5: print('epoch:{}, lr:{}, wd:{}'.format(1+epoch, self.init_lr, self.wd)) return self.init_lr maxEpochs = NUM_EPOCHS baseLR = self.init_lr power = 1.0 # compute the new learning rate based on polynomial decay alpha = baseLR * (1 - (epoch / (float(maxEpochs)))) ** power print('epoch:{}, lr:{}, wd:{}'.format(1+epoch, alpha, self.wd)) # return the new learning rate return alpha def poly_decay(self, epoch): initial_lrate = self.init_lr drop = 0.5 epochs_drop = 12 lrate = initial_lrate * (drop ** ((1+epoch)//epochs_drop)) print('epoch:{}, lr:{}, wd:{}'.format(1+epoch, lrate, self.wd)) return lrate def plot_loss(self, H, fold): # grab the history object dictionary H = H.history # plot the training loss and accuracy N = np.arange(0, len(H["loss"])) plt.style.use("ggplot") plt.figure() plt.plot(N, H["loss"], label="train_loss") plt.plot(N, H["val_loss"], label="test_loss") plt.plot(N, H["acc"], label="train_acc") plt.plot(N, H["val_acc"], label="test_acc") plt.title("model {} option {}".format(self.name, self.config.option)) plt.xlabel("Epoch #") plt.ylabel("Loss/Accuracy") plt.legend() # save the figure os.makedirs('loss', exist_ok=True) plt.savefig('loss/{}-op{}-fold{}.png'.format(self.name, self.config.option, fold)) plt.close() def plot_loss_option3(self, H1, H2, fold): # grab the history object dictionary H1 = H1.history H2 = H2.history H = {} H['loss'] = H1['loss'] + H2['loss'] H['val_loss'] = H1['val_loss'] + H2['val_loss'] H['acc'] = H1['acc'] + H2['acc'] H['val_acc'] = H1['val_acc'] + H2['val_acc'] # plot the training loss and accuracy N = np.arange(0, len(H["loss"])) plt.style.use("ggplot") plt.figure() plt.plot(N, H["loss"], label="train_loss") plt.plot(N, H["val_loss"], label="test_loss") plt.plot(N, H["acc"], label="train_acc") plt.plot(N, H["val_acc"], label="test_acc") plt.title("model {} option {}".format(self.name, self.config.option)) plt.xlabel("Epoch #") plt.ylabel("Loss/Accuracy") plt.legend() # save the figure os.makedirs('loss', exist_ok=True) plt.savefig('loss/{}-op{}-fold{}.png'.format(self.name, self.config.option, fold)) plt.close() def train_predict(self, train, train_y, test, option=3): """ we use KFold way to train our model and save the model :param train: :return: """ name = self.name model_name = '../ckpt-op{}/{}'.format(self.config.option, self.name) os.makedirs(model_name, exist_ok=True) self.model.save_weights(model_name + '/init_weight.h5') count_kflod = 0 predict = np.zeros((len(test['word']), self.n_class)) oof_predict = np.zeros((len(train['word']), self.n_class)) scores_acc = [] scores_f1 = [] for train_index, test_index in self.kf.split(train['word']): kfold_X_train = {} kfold_X_valid = {} model_prefix = model_name + '/' + str(count_kflod) if not os.path.exists(model_prefix): os.mkdir(model_prefix) filepath = model_prefix + '/' + str(count_kflod) + 'model.h5' checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min') y_train, y_test = train_y[train_index], train_y[test_index] self.model.load_weights(model_name + '/init_weight.h5') for c in ['word', 'char', 'word_left', 'word_right', 'char_left', 'char_right']: kfold_X_train[c] = train[c][train_index] kfold_X_valid[c] = train[c][test_index] if option == 1: # 冻结embedding, 并且使用snapshot的方式来训练模型 adam_optimizer = optimizers.Adam(lr=1e-3, clipvalue=2.0) self.model.compile(loss='categorical_crossentropy', optimizer=adam_optimizer, metrics=['accuracy']) self.model.summary() self.model.fit(kfold_X_train, y_train, batch_size=self.batch_size * self.config.n_gpus, epochs=self.snap_epoch, verbose=1, validation_data=(kfold_X_valid, y_test), callbacks=self.snapshot.get_callbacks(model_save_place=model_prefix)) elif option == 2: # 前期冻结embedding层,训练好参数后,开放enbedding层并且使用snapshot的方式来训练模型 adam_optimizer = optimizers.Adam(lr=1e-3, clipvalue=2) self.model.compile(loss='categorical_crossentropy', optimizer=adam_optimizer, metrics=['accuracy']) self.model.summary() H = self.model.fit(kfold_X_train, y_train, batch_size=self.batch_size * self.config.n_gpus, epochs=6, verbose=1, validation_data=(kfold_X_valid, y_test)) if self.config.main_feature == 'all': self.model.get_layer('char_embedding').trainable = True self.model.get_layer('word_embedding').trainable = True elif self.config.main_feature == 'word': self.model.get_layer('word_embedding').trainable = True elif self.config.main_feature == 'char': self.model.get_layer('char_embedding').trainable = True else: exit('Wrong feature') self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) H = self.model.fit(kfold_X_train, y_train, batch_size=self.batch_size * self.config.n_gpus, epochs=self.snap_epoch, verbose=1, validation_data=(kfold_X_valid, y_test), callbacks=self.snapshot.get_callbacks(model_save_place=model_prefix)) elif option == 3: # 前期冻结embedding层,训练好参数后,开放enbedding层继续训练模型 if self.config.main_feature == 'all': self.model.get_layer('char_embedding').trainable = False self.model.get_layer('word_embedding').trainable = False elif self.config.main_feature == 'word': self.model.get_layer('word_embedding').trainable = False elif self.config.main_feature == 'char': self.model.get_layer('char_embedding').trainable = False else: exit('Wrong feature') # callbacks = [LearningRateScheduler(self.poly_decay)] adam_optimizer = optimizers.Adam(lr=1e-3, clipvalue=2.4) self.model.compile(loss='categorical_crossentropy', optimizer=adam_optimizer, metrics=['accuracy']) self.model.summary() H1 = self.model.fit(kfold_X_train, y_train, batch_size=self.batch_size * self.config.n_gpus, epochs=2, verbose=1, validation_data=(kfold_X_valid, y_test)) if self.config.main_feature == 'all': self.model.get_layer('char_embedding').trainable = True self.model.get_layer('word_embedding').trainable = True elif self.config.main_feature == 'word': self.model.get_layer('word_embedding').trainable = True elif self.config.main_feature == 'char': self.model.get_layer('char_embedding').trainable = True else: exit('Wrong feature') print('放开embedding训练') callbacks = [ self.lr_schedule, checkpoint, ] adam_optimizer = optimizers.Adam(lr=1e-3, clipvalue=1.5) self.model.compile(loss='categorical_crossentropy', optimizer=adam_optimizer, metrics=['accuracy']) self.model.summary() H2 = self.model.fit(kfold_X_train, y_train, batch_size=self.batch_size * self.config.n_gpus, epochs=10, verbose=1, validation_data=(kfold_X_valid, y_test), callbacks=callbacks) # self.model.save_weights(model_prefix + '/' + str(count_kflod) + 'model.h5') self.plot_loss_option3(H1, H2, count_kflod) elif option == 4: if self.config.n_gpus == 1: if self.config.main_feature == 'all': self.model.get_layer('char_embedding').trainable = True self.model.get_layer('word_embedding').trainable = True elif self.config.main_feature == 'word': self.model.get_layer('word_embedding').trainable = True elif self.config.main_feature == 'char': self.model.get_layer('char_embedding').trainable = True else: exit('Wrong feature') opt = optimizers.SGD(lr=self.init_lr, momentum=0.9, decay=1e-6) self.model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"]) self.model.summary() callbacks = [ LearningRateScheduler(self.poly_decay), self.early_stop_monitor, ] H = self.model.fit(kfold_X_train, y_train, batch_size=self.batch_size * self.config.n_gpus, epochs=NUM_EPOCHS, verbose=1, validation_data=(kfold_X_valid, y_test), callbacks=callbacks) self.plot_loss(H, count_kflod) self.model.save_weights(model_prefix + '/' + str(count_kflod) + 'model.h5') elif option == 5: # adam 目前最佳 # if self.config.n_gpus == 1: # if self.config.main_feature == 'all': # self.model.get_layer('char_embedding').trainable = True # self.model.get_layer('word_embedding').trainable = True # elif self.config.main_feature == 'word': # self.model.get_layer('word_embedding').trainable = True # elif self.config.main_feature == 'char': # self.model.get_layer('char_embedding').trainable = True # else: # exit('Wrong feature') # if self.config.model_name == 'rnn_attention': # opt = optimizers.SGD(lr=0.2, decay=1e-6, momentum=0.95, nesterov=True) opt = optimizers.Adam(lr=1e-3, clipnorm=1.0) # opt = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0) self.model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"]) # self.model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['acc']) self.model.summary() callbacks = [ checkpoint, self.lr_schedule, ] H = self.model.fit(kfold_X_train, y_train, batch_size=self.batch_size * self.config.n_gpus, epochs=20, verbose=1, validation_data=(kfold_X_valid, y_test), callbacks=callbacks) self.plot_loss(H, count_kflod) # self.model.save_weights(model_prefix + '/' + str(count_kflod) + 'model.h5') elif option == 6: # snapshot + adam if self.config.n_gpus == 1: if self.config.main_feature == 'all': self.model.get_layer('char_embedding').trainable = True self.model.get_layer('word_embedding').trainable = True elif self.config.main_feature == 'word': self.model.get_layer('word_embedding').trainable = True elif self.config.main_feature == 'char': self.model.get_layer('char_embedding').trainable = True else: exit('Wrong feature') opt = optimizers.Adam(lr=self.init_lr, decay=1e-6) self.model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"]) self.model.summary() H = self.model.fit(kfold_X_train, y_train, batch_size=self.batch_size * self.config.n_gpus, epochs=NUM_EPOCHS, verbose=1, validation_data=(kfold_X_valid, y_test), callbacks=callbacks) self.plot_loss(H, count_kflod) # self.model.save_weights(model_prefix + '/' + str(count_kflod) + 'model.h5') else: exit('Wrong option') evaluations = [] for i in os.listdir(model_prefix): if '.h5' in i: evaluations.append(i) print(evaluations) preds1 = np.zeros((test['word'].shape[0], self.n_class)) preds2 = np.zeros((len(kfold_X_valid['word']), self.n_class)) for run, i in enumerate(evaluations): self.model.load_weights(os.path.join(model_prefix, i)) preds1 += self.model.predict(test, verbose=1) / len(evaluations) preds2 += self.model.predict(kfold_X_valid, batch_size=64*self.config.n_gpus) / len(evaluations) # model.save_weights('./ckpt/DNN_SNAP/' + str(count_kflod) + 'DNN.h5') # results = model.predict(test, verbose=1) predict += preds1 / self.n_folds oof_predict[test_index] = preds2 accuracy = self.cal_acc(oof_predict[test_index], np.argmax(y_test, axis=1)) f1 = self.cal_f_alpha(oof_predict[test_index], np.argmax(y_test, axis=1), n_out=self.n_class) print('the kflod cv acc is : ', str(accuracy)) print('the kflod cv f1 is : ', str(f1)) count_kflod += 1 scores_acc.append(accuracy) scores_f1.append(f1) print('total acc scores is ', np.mean(scores_acc)) print('total f1 scores is ', np.mean(scores_f1)) os.makedirs('../data/result-op{}'.format(self.config.option), exist_ok=True) with open('../data/result-op{}/{}_oof_f1_{}_a{}.pkl'.format(self.config.option, name, str(np.mean(scores_f1)), str(np.mean(scores_acc))), 'wb') as f: pickle.dump(oof_predict, f) with open('../data/result-op{}/{}_pre_f1_{}_a{}.pkl'.format(self.config.option, name, str(np.mean(scores_f1)), str(np.mean(scores_acc))), 'wb') as f: pickle.dump(predict, f) print('done') def rerun(self, test): name = self.name evaluations = [] for i in range(4): evaluations.append('../ckpt/{}/{}/{}model.h5'.format(name, i, i)) predict = np.zeros((len(test), self.n_class)) preds1 = np.zeros((test.shape[0], self.n_class)) for run, i in enumerate(evaluations): self.model.load_weights(i) preds1 += self.model.predict(test, verbose=1) / len(evaluations) predict += preds1 / 4 with open('../data/result/' + name + '_pre_.pkl', 'wb') as f: pickle.dump(predict, f)
def stacking_pseudo(train, train_y, test, results): answer = np.zeros((results.shape[0], 1)) for count in range(len(results)): answer[count] = np.argmax(results[count]) answer = np_utils.to_categorical(answer) train_y = np.concatenate([train_y, answer], axis=0) train['news'] = np.concatenate([train['news'], test['news']], axis=0) savepath = './pesudo_/' if not os.path.exists(savepath): os.mkdir(savepath) count_kflod = 0 num_folds = 6 kf = KFold(n_splits=num_folds, shuffle=True, random_state=10) predict = np.zeros((test['news'].shape[0], 3)) oof_predict = np.zeros((train['news'].shape[0], 3)) scores = [] for train_index, test_index in kf.split(train['news']): kfold_X_train = {} kfold_X_valid = {} y_train, y_test = train_y[train_index], train_y[test_index] for c in ['news']: kfold_X_train[c] = train[c][train_index] kfold_X_valid[c] = train[c][test_index] test_watch = [] test_label = [] for i in test_index: if i < 48480: test_watch.append(train[i]) test_label.append(train_y[i]) test_watch = np.array(test_watch) test_label = np.array(test_label) model_prefix = savepath + 'DNN' + str(count_kflod) if not os.path.exists(model_prefix): os.mkdir(model_prefix) M = 4 # number of snapshots alpha_zero = 1e-3 # initial learning rate snap_epoch = 16 snapshot = SnapshotCallbackBuilder(snap_epoch, M, alpha_zero) res_model = get_model(train['news']) res_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # res_model.fit(train_x, train_y, batch_size=BATCH_SIZE, epochs=EPOCH, verbose=1, class_weight=class_weight) res_model.fit(kfold_X_train, y_train, batch_size=BATCH_SIZE, epochs=snap_epoch, verbose=1, validation_data=(test_watch, test_label), callbacks=snapshot.get_callbacks(model_save_place=model_prefix)) evaluations = [] for i in os.listdir(model_prefix): if '.h5' in i: evaluations.append(i) print(evaluations) preds1 = np.zeros((test['news'].shape[0], 3)) preds2 = np.zeros((len(kfold_X_valid['news']), 3)) for run, i in enumerate(evaluations): res_model.load_weights(os.path.join(model_prefix, i)) preds1 += res_model.predict(test, verbose=1) / len(evaluations) preds2 += res_model.predict(kfold_X_valid, batch_size=128) / len(evaluations) predict += preds1 / num_folds oof_predict[test_index] = preds2 accuracy = check_accuracy(oof_predict[test_index], y_test, test_index) print('the kflod cv is : ', str(accuracy)) count_kflod += 1 scores.append(accuracy) print('total scores is ', np.mean(scores)) return predict
class BasicModel: ''' basic class of all models ''' def __init__(self, maxLen, ocrLen, max_features, init_embedding_matrix, name='basicModel', num_flods=4, batch_size=64): """ parameters initialize :param maxLen: :param max_features: :param init_embedding_matrix: """ self.name = name self.ocrLen = ocrLen self.batch_size = batch_size self.maxLen = maxLen self.max_features = max_features self.embedding_matrix = init_embedding_matrix self.embed_size = len(init_embedding_matrix[0]) self.num_folds = num_flods self.kf = KFold(n_splits=self.num_folds, shuffle=True, random_state=10) M = 3 # number of snapshots alpha_zero = 5e-4 # initial learning rate self.snap_epoch = 12 self.snapshot = SnapshotCallbackBuilder(self.snap_epoch, M, alpha_zero) self.model = self.create_model() def create_model(self): pass def train_predict(self, train, train_y, test, option=3, true_length=48480): """ we use KFold way to train our model and save the model :param train: :return: """ name = self.name model_name = '../ckpt/' + name if not os.path.exists(model_name): os.mkdir(model_name) self.model.save_weights(model_name + '/init_weight.h5') count_kflod = 0 predict = np.zeros((test['news'].shape[0], 3)) oof_predict = np.zeros((train['news'].shape[0], 3)) scores = [] for train_index, test_index in self.kf.split(train['news']): kfold_X_train = {} kfold_X_valid = {} model_prefix = model_name + '/' + str(count_kflod) if not os.path.exists(model_prefix): os.mkdir(model_prefix) y_train, y_test = train_y[train_index], train_y[test_index] self.model.load_weights(model_name + '/init_weight.h5') for c in ['news', 'ocr']: kfold_X_train[c] = train[c][train_index] kfold_X_valid[c] = train[c][test_index] if option == 1: # 冻结embedding, 并且使用snapshot的方式来训练模型 self.model.get_layer('embedding').trainable = False adam_optimizer = optimizers.Adam(lr=1e-3, clipvalue=2.0) self.model.compile(loss='categorical_crossentropy', optimizer=adam_optimizer, metrics=['accuracy']) self.model.summary() self.model.fit(kfold_X_train, y_train, batch_size=self.batch_size, epochs=self.snap_epoch, verbose=1, validation_data=(kfold_X_valid, y_test), callbacks=self.snapshot.get_callbacks(model_save_place=model_prefix)) elif option == 2: # 前期冻结embedding层,训练好参数后,开放enbedding层并且使用snapshot的方式来训练模型 self.model.get_layer('embedding').trainable = False adam_optimizer = optimizers.Adam(lr=1e-3, clipvalue=2) self.model.compile(loss='categorical_crossentropy', optimizer=adam_optimizer, metrics=['accuracy']) self.model.summary() self.model.fit(kfold_X_train, y_train, batch_size=self.batch_size, epochs=4, verbose=1, validation_data=(kfold_X_valid, y_test)) self.model.get_layer('embedding').trainable = True self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) self.model.fit(kfold_X_train, y_train, batch_size=self.batch_size, epochs=self.snap_epoch, verbose=1, validation_data=(kfold_X_valid, y_test), callbacks=self.snapshot.get_callbacks(model_save_place=model_prefix)) else: # 前期冻结embedding层,训练好参数后,开放enbedding层继续训练模型 self.model.get_layer('embedding').trainable = False adam_optimizer = optimizers.Adam(lr=1e-3, clipvalue=2.4) self.model.compile(loss='categorical_crossentropy', optimizer=adam_optimizer, metrics=['accuracy']) self.model.summary() self.model.fit(kfold_X_train, y_train, batch_size=self.batch_size, epochs=6, verbose=1, validation_data=(kfold_X_valid, y_test)) adam_optimizer = optimizers.Adam(lr=1e-4, clipvalue=1.5) self.model.get_layer('embedding').trainable = True self.model.compile(loss='categorical_crossentropy', optimizer=adam_optimizer, metrics=['accuracy']) self.model.fit(kfold_X_train, y_train, batch_size=self.batch_size, epochs=5, verbose=1, validation_data=(kfold_X_valid, y_test)) self.model.save_weights(model_prefix + '/' + str(count_kflod) + 'model.h5') evaluations = [] for i in os.listdir(model_prefix): if '.h5' in i: evaluations.append(i) print(evaluations) preds1 = np.zeros((test['news'].shape[0], 3)) preds2 = np.zeros((len(kfold_X_valid['news']), 3)) for run, i in enumerate(evaluations): self.model.load_weights(os.path.join(model_prefix, i)) preds1 += self.model.predict(test, verbose=1) / len(evaluations) preds2 += self.model.predict(kfold_X_valid, batch_size=128) / len(evaluations) # model.save_weights('./ckpt/DNN_SNAP/' + str(count_kflod) + 'DNN.h5') # results = model.predict(test, verbose=1) predict += preds1 / self.num_folds oof_predict[test_index] = preds2 accuracy = self.check_accuracy(oof_predict[test_index], y_test, test_index, true_length) print('the kflod cv is : ', str(accuracy)) count_kflod += 1 scores.append(accuracy) print('total scores is ', np.mean(scores)) with open('../data/result/' + name + '_oof_' + str(np.mean(scores)) + '.txt', 'wb') as f: pickle.dump(oof_predict, f) with open('../data/result/' + name + '_pre_' + str(np.mean(scores)) + '.txt', 'wb') as f: pickle.dump(predict, f) print('done') def check_accuracy(self, pred, label, test_index, true_length): right = 0 total = 0 for count, re in enumerate(pred): cc = test_index[count] if cc >= true_length: continue total += 1 flag = np.argmax(re) if int(flag) == int(np.argmax(label[count])): right += 1 return right / total
class BasicModel: ''' basic class of all models ''' def __init__(self, maxLen, ocrLen, max_features, init_embedding_matrix, name='basicModel', num_flods=4, batch_size=64): """ parameters initialize :param maxLen: :param max_features: :param init_embedding_matrix: """ self.name = name self.ocrLen = ocrLen self.batch_size = batch_size self.maxLen = maxLen self.max_features = max_features self.embedding_matrix = init_embedding_matrix self.embed_size = len(init_embedding_matrix[0]) self.num_folds = num_flods self.kf = KFold(n_splits=self.num_folds, shuffle=True, random_state=10) M = 3 # number of snapshots alpha_zero = 5e-4 # initial learning rate self.snap_epoch = 12 self.snapshot = SnapshotCallbackBuilder(self.snap_epoch, M, alpha_zero) self.model = self.create_model() def create_model(self): pass def train_predict(self, train, train_y, test, option=3, true_length=48480): """ we use KFold way to train our model and save the model :param train: :return: """ name = self.name model_name = '../ckpt/' + name if not os.path.exists(model_name): os.mkdir(model_name) self.model.save_weights(model_name + '/init_weight.h5') count_kflod = 0 predict = np.zeros((test['news'].shape[0], 3)) oof_predict = np.zeros((train['news'].shape[0], 3)) scores = [] for train_index, test_index in self.kf.split(train['news']): kfold_X_train = {} kfold_X_valid = {} model_prefix = model_name + '/' + str(count_kflod) if not os.path.exists(model_prefix): os.mkdir(model_prefix) y_train, y_test = train_y[train_index], train_y[test_index] self.model.load_weights(model_name + '/init_weight.h5') for c in ['news', 'ocr']: kfold_X_train[c] = train[c][train_index] kfold_X_valid[c] = train[c][test_index] if option == 1: # 冻结embedding, 并且使用snapshot的方式来训练模型 self.model.get_layer('embedding').trainable = False adam_optimizer = optimizers.Adam(lr=1e-3, clipvalue=2.0) self.model.compile(loss='categorical_crossentropy', optimizer=adam_optimizer, metrics=['accuracy']) self.model.summary() self.model.fit(kfold_X_train, y_train, batch_size=self.batch_size, epochs=self.snap_epoch, verbose=1, validation_data=(kfold_X_valid, y_test), callbacks=self.snapshot.get_callbacks( model_save_place=model_prefix)) elif option == 2: # 前期冻结embedding层,训练好参数后,开放enbedding层并且使用snapshot的方式来训练模型 self.model.get_layer('embedding').trainable = False adam_optimizer = optimizers.Adam(lr=1e-3, clipvalue=2) self.model.compile(loss='categorical_crossentropy', optimizer=adam_optimizer, metrics=['accuracy']) self.model.summary() self.model.fit(kfold_X_train, y_train, batch_size=self.batch_size, epochs=4, verbose=1, validation_data=(kfold_X_valid, y_test)) self.model.get_layer('embedding').trainable = True self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) self.model.fit(kfold_X_train, y_train, batch_size=self.batch_size, epochs=self.snap_epoch, verbose=1, validation_data=(kfold_X_valid, y_test), callbacks=self.snapshot.get_callbacks( model_save_place=model_prefix)) else: # 前期冻结embedding层,训练好参数后,开放enbedding层继续训练模型 self.model.get_layer('embedding').trainable = False adam_optimizer = optimizers.Adam(lr=1e-3, clipvalue=2.4) self.model.compile(loss='categorical_crossentropy', optimizer=adam_optimizer, metrics=['accuracy']) self.model.summary() self.model.fit(kfold_X_train, y_train, batch_size=self.batch_size, epochs=6, verbose=1, validation_data=(kfold_X_valid, y_test)) adam_optimizer = optimizers.Adam(lr=1e-4, clipvalue=1.5) self.model.get_layer('embedding').trainable = True self.model.compile(loss='categorical_crossentropy', optimizer=adam_optimizer, metrics=['accuracy']) self.model.fit(kfold_X_train, y_train, batch_size=self.batch_size, epochs=5, verbose=1, validation_data=(kfold_X_valid, y_test)) self.model.save_weights(model_prefix + '/' + str(count_kflod) + 'model.h5') evaluations = [] for i in os.listdir(model_prefix): if '.h5' in i: evaluations.append(i) print(evaluations) preds1 = np.zeros((test['news'].shape[0], 3)) preds2 = np.zeros((len(kfold_X_valid['news']), 3)) for run, i in enumerate(evaluations): self.model.load_weights(os.path.join(model_prefix, i)) preds1 += self.model.predict(test, verbose=1) / len(evaluations) preds2 += self.model.predict(kfold_X_valid, batch_size=128) / len(evaluations) # model.save_weights('./ckpt/DNN_SNAP/' + str(count_kflod) + 'DNN.h5') # results = model.predict(test, verbose=1) predict += preds1 / self.num_folds oof_predict[test_index] = preds2 accuracy = self.check_accuracy(oof_predict[test_index], y_test, test_index, true_length) print('the kflod cv is : ', str(accuracy)) count_kflod += 1 scores.append(accuracy) print('total scores is ', np.mean(scores)) with open( '../data/result/' + name + '_oof_' + str(np.mean(scores)) + '.txt', 'wb') as f: pickle.dump(oof_predict, f) with open( '../data/result/' + name + '_pre_' + str(np.mean(scores)) + '.txt', 'wb') as f: pickle.dump(predict, f) print('done') def check_accuracy(self, pred, label, test_index, true_length): right = 0 total = 0 for count, re in enumerate(pred): cc = test_index[count] if cc >= true_length: continue total += 1 flag = np.argmax(re) if int(flag) == int(np.argmax(label[count])): right += 1 return right / total
def stacking_pseudo(train, train_y, test, results): answer = np.argmax(results, axis=1) answer = np_utils.to_categorical(answer, num_classes=config.n_class) train_y = np.concatenate([train_y, answer], axis=0) train = np.concatenate([train, test], axis=0) savepath = './pesudo_{}/'.format(args.option) if not os.path.exists(savepath): os.mkdir(savepath) count_kflod = 0 num_folds = 6 kf = KFold(n_splits=num_folds, shuffle=True, random_state=10) predict = np.zeros((test.shape[0], config.n_class)) oof_predict = np.zeros((train.shape[0], config.n_class)) scores = [] f1s = [] for train_index, test_index in kf.split(train): kfold_X_train = {} kfold_X_valid = {} y_train, y_test = train_y[train_index], train_y[test_index] kfold_X_train, kfold_X_valid = train[train_index], train[test_index] model_prefix = savepath + 'DNN' + str(count_kflod) if not os.path.exists(model_prefix): os.mkdir(model_prefix) M = 4 # number of snapshots alpha_zero = 1e-3 # initial learning rate snap_epoch = 16 snapshot = SnapshotCallbackBuilder(snap_epoch, M, alpha_zero) res_model = get_model(train) res_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # res_model.fit(train_x, train_y, batch_size=BATCH_SIZE, epochs=EPOCH, verbose=1, class_weight=class_weight) res_model.fit( kfold_X_train, y_train, batch_size=BATCH_SIZE, epochs=snap_epoch, verbose=1, validation_data=(kfold_X_valid, y_test), callbacks=snapshot.get_callbacks(model_save_place=model_prefix)) evaluations = [] for i in os.listdir(model_prefix): if '.h5' in i: evaluations.append(i) print(evaluations) preds1 = np.zeros((test.shape[0], config.n_class)) preds2 = np.zeros((len(kfold_X_valid), config.n_class)) for run, i in enumerate(evaluations): res_model.load_weights(os.path.join(model_prefix, i)) preds1 += res_model.predict(test, verbose=1) / len(evaluations) preds2 += res_model.predict(kfold_X_valid, batch_size=128) / len(evaluations) predict += preds1 / num_folds oof_predict[test_index] = preds2 accuracy = mb.cal_acc(oof_predict[test_index], np.argmax(y_test, axis=1)) f1 = mb.cal_f_alpha(oof_predict[test_index], np.argmax(y_test, axis=1), n_out=config.n_class) print('the kflod cv is : ', str(accuracy)) print('the kflod f1 is : ', str(f1)) count_kflod += 1 scores.append(accuracy) f1s.append(f1) print('total scores is ', np.mean(scores)) print('total f1 is ', np.mean(f1s)) return predict
def stacking_pseudo(train, train_y, test, results): answer = np.reshape(np.argmax(results, axis=-1), [-1]) answer = np.reshape(np.eye(4)[answer], [-1, 10, 4]) train_y = np.concatenate([train_y, answer], axis=0) train = np.concatenate([train, test], axis=0) savepath = './pesudo_{}_dt{}/'.format(args.option, args.data_type) if not os.path.exists(savepath): os.mkdir(savepath) count_kflod = 0 num_folds = 5 kf = KFold(n_splits=num_folds, shuffle=True, random_state=10) predict = np.zeros((test.shape[0], 10, 4)) oof_predict = np.zeros((train.shape[0], 10, 4)) scores = [] for i, (train_index, test_index) in enumerate(kf.split(train)): print('第{}折'.format(i)) kfold_X_train = {} kfold_X_valid = {} y_train, y_test = train_y[train_index], train_y[test_index] kfold_X_train, kfold_X_valid = train[train_index], train[test_index] model_prefix = savepath + 'DNN' + str(count_kflod) if not os.path.exists(model_prefix): os.mkdir(model_prefix) M = 3 # number of snapshots alpha_zero = 1e-3 # initial learning rate snap_epoch = 30 snapshot = SnapshotCallbackBuilder(snap_epoch, M, alpha_zero) # M = 1 # number of snapshots # snap_epoch = 16 # jz_schedule = JZTrainCategory(model_prefix, snap_epoch, M, save_weights_only=True, monitor='val_loss', factor=0.7, patience=1) res_model = get_model(train) res_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) res_model.summary() # res_model.fit(train_x, train_y, batch_size=BATCH_SIZE, epochs=EPOCH, verbose=1, class_weight=class_weight) res_model.fit( kfold_X_train, y_train, batch_size=BATCH_SIZE, epochs=snap_epoch, verbose=1, validation_data=(kfold_X_valid, y_test), callbacks=snapshot.get_callbacks(model_save_place=model_prefix)) evaluations = [] for i in os.listdir(model_prefix): if '.h5' in i: evaluations.append(i) test_pred_ = np.zeros((test.shape[0], 10, 4)) oof_pred_ = np.zeros((len(kfold_X_valid), 10, 4)) for run, i in enumerate(evaluations): print('loading from {}'.format(os.path.join(model_prefix, i))) res_model.load_weights(os.path.join(model_prefix, i)) test_pred_ += res_model.predict(test, verbose=1, batch_size=256) / len(evaluations) oof_pred_ += res_model.predict(kfold_X_valid, batch_size=256) / len(evaluations) predict += test_pred_ / num_folds oof_predict[test_index] = oof_pred_ f1 = get_f1_score(np.argmax(oof_pred_, -1), np.argmax(y_test, -1), verbose=True) print(i, ' kflod cv f1 : ', str(f1)) count_kflod += 1 scores.append(f1) print('f1 {} -> {}'.format(scores, np.mean(scores))) return predict, np.mean(scores)